Pre Merge pull request !277 from 王笑天/development

4 years ago · bc8a3fd917
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
@@ -318,7 +318,11 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_
      AddDependLife(node, node, stream_id_, depend_stream_life_, total_node_depend_stream_life);
    }
  }
  depend_stream_life_[stream_id_] = GetLifeBegin();

  // not same stream can't be reused by life time directly, should be reused by dependence
  if (same_stream_) {
    depend_stream_life_[stream_id_] = GetLifeBegin();
  }
 }

 size_t MemoryBlock::GetLifeEnd() {
@@ -415,6 +419,15 @@ BlockMemAssigner::~BlockMemAssigner() {
  }
 }

 void BlockMemAssigner::MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc) {
  // if input size just one, no need to reassign continuous memory
  bool is_input_continuous = false;
  (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  if (is_input_continuous && (node_op_desc->GetInputsSize() <= 1)) {
    (void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
  }
 }

 void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
  vector<int64_t> temp;
  for (const NodePtr &n : compute_graph_->GetAllNodes()) {
@@ -425,6 +438,8 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
      atomic_addr_clean_id_ = node_op_desc->GetId();
    }

    MarkContinuousAllocedForOneInput(node_op_desc);

    for (auto &out_anchor : n->GetAllOutDataAnchors()) {
      GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
      bool reuse_input = false;
@@ -815,14 +830,21 @@ bool BlockMemAssigner::IsContinuousOutput(const NodePtr &n) {
    return false;
  }

  // Get the continuous output type of the node, default is false
  bool is_output_continuous = false;
  auto node_desc = n->GetOpDesc();
  if (node_desc == nullptr) {
    GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str());
    return false;
  }

  // if output size just one, no need to reassign continuous memory
  if (node_desc->GetOutputsSize() == 1) {
    GELOGI("op %s output size is one, no need to continuous process.", n->GetName().c_str());
    return false;
  }

  // Get the continuous output type of the node, default is false
  bool is_output_continuous = false;

  // If GetBool fail, is_output_continuous is false.
  (void)ge::AttrUtils::GetBool(node_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  if (is_output_continuous) {
@@ -928,6 +950,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
  auto node_op_desc = n->GetOpDesc();
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");

  MemoryBlock *block = nullptr;
  int64_t total_size = 0;
  int64_t memory_type = RT_MEMORY_HBM;
@@ -1111,15 +1134,21 @@ bool IsKnownSubgraphData(const NodePtr &node) {
  return node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX);
 }

 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory) {
 void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory,
                                     bool same_stream) {
  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(to_release == nullptr, return, "Input parameter to_release is null.");
  GE_CHK_TRUE_EXEC_INFO(to_release->ref_count_ <= 0, return, "Release memory");
  GE_CHK_TRUE_EXEC_INFO(!to_release->reuse_mem_, return, "doesn't reuse memory");
  --to_release->ref_count_;
  if (!same_stream) {
    to_release->same_stream_ = false;
  }
  if (to_release->ref_count_ == 0) {
    to_release->SetLifeTimeEnd(life_time_);
    reusable_memory.emplace_back(to_release);
    AddReusableBlockCount(*to_release, reusable_block_counts_);
    if (to_release->same_stream_) {
      reusable_memory.emplace_back(to_release);
      AddReusableBlockCount(*to_release, reusable_block_counts_);
    }
  }
 }

@@ -1159,10 +1188,9 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec
             node_type_indexs.back().node->GetName().c_str());

      if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) &&
          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())) &&
          (node->GetOpDesc()->GetStreamId() == block->stream_id_)) {
        ReleaseMemory(block, reusable_memory);
        if (block->ref_count_ == 0) {
          (node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) {
        ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_));
        if (block->ref_count_ == 0 && block->same_stream_) {
          SetLastUsedInputMemAttr(node, in_anchor->GetIdx());
        }
      }
@@ -1682,10 +1710,10 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block,
    op_desc->SetWorkspace(workspace_list);
  }
  GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu]"
         " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d] isref[%d].", graph_name.c_str(),
         " noalignsize[%zu] life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d].", graph_name.c_str(),
         op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),
         block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block, block->reuse_mem_,
         block->continuous_block_, block->deleted_block_, node_type.ref_input);
         block->continuous_block_, block->deleted_block_, block->same_stream_, node_type.ref_input);
 }

 void SetBlockOpMemOffset(MemoryBlock *block, bool child_block) {
@@ -1746,9 +1774,8 @@ Status BlockMemAssigner::Assign() {

 bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
  return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
         (node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
         (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
         (node_type == HVDCALLBACKBROADCAST);
         (node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
         (node_type == ASSIGN) || (node_type == HVDWAIT);
 }

 bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {
--- a/ge/graph/build/memory/block_mem_assigner.h
+++ b/ge/graph/build/memory/block_mem_assigner.h
@@ -65,6 +65,7 @@ class MemoryBlock {
        stream_id_(stream_id),
        deleted_block_(false),
        reuse_mem_(reuse_mem),
        same_stream_(true),
        input_index_(0),
        continuous_block_(false),
        first_continuous_block_(false),
@@ -142,6 +143,7 @@ class MemoryBlock {
  int64_t stream_id_;
  bool deleted_block_;
  bool reuse_mem_;
  bool same_stream_;
  uint32_t input_index_;
  bool continuous_block_;
  bool first_continuous_block_;
@@ -353,7 +355,7 @@ class BlockMemAssigner : public MemAssigner {
  /// @return void
  /// @author
  ///
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory);
  void ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock *> &reusable_memory, bool same_stream = true);

  ///
  /// @ingroup GE
@@ -409,6 +411,8 @@ class BlockMemAssigner : public MemAssigner {

  MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem);

  void MarkContinuousAllocedForOneInput(OpDescPtr &node_op_desc);

  std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_;

  std::map<std::string, uint64_t> reusable_block_counts_;
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() {
                           RT_MEMCPY_HOST_TO_DEVICE));
  }

  for (auto op_desc : variable_op_list_) {
    ret =
        VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_);
    GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
                     op_desc->GetName().c_str());
  }
  return ret;
 }

--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -1997,6 +1997,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
                                               new (std::nothrow) TransOpWithoutReshapeFusionPass))
  GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass",
                                               new (std::nothrow) TransOpBreadthFusionPass))
  GE_CHK_STATUS_RET(
      after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass));

  GE_TIMESTAMP_START(after_merge_passes);
  auto ret = after_merge_passes.Run(compute_graph);
--- a/ge/graph/passes/hccl_memcpy_pass.cc
+++ b/ge/graph/passes/hccl_memcpy_pass.cc
@@ -32,46 +32,152 @@ const char *const kInputMutable = "_input_mutable";
 }  // namespace
 namespace ge {
 Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
  Status ret = SUCCESS;
  GE_IF_BOOL_EXEC(graph == nullptr, GELOGE(PARAM_INVALID, "param [graph] must not be null."); return PARAM_INVALID);
  for (const auto &node : graph->GetDirectNode()) {
    auto op_desc = node->GetOpDesc();
    GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
    if (op_desc == nullptr) {
      GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str());
      return INTERNAL_ERROR;
    }

    ret = ContinuousInputProcess(graph, node);
    if (ret != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy, node_name:%s.", node->GetName().c_str());
      return ret;
    }

    ret = MutableInputProcess(graph, node);
    if (ret != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "failed MutableInputProcess, node_name:%s.", node->GetName().c_str());
      return ret;
    }

    ret = P2pmemInputProcess(graph, node);
    if (ret != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "failed P2pmemInputProcess, node_name:%s.", node->GetName().c_str());
      return ret;
    }

  }
  return ret;
 }

 // If node has _input_mutable attr, means input mem may be modified when op execute.
 // In order to avoid to affect another op execute with same input when data modified,
 // need to inset memcpy node between.
 // also works on situation that input is variable or const.
 Status HcclMemcpyPass::MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
  auto op_desc = node->GetOpDesc();

  bool node_input_mutable = false;
  if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
    return SUCCESS;
  }

  if (!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable)) {
    GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str());
    return FAILED;
  }
  if (!node_input_mutable) {
    return SUCCESS;
  }

    bool node_input_mutable = false;
    if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
  GELOGI("input mutable hcom op is:%s.", op_desc->GetName().c_str());
  for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
    if (hccl_in_anchor == nullptr) {
      continue;
    }
    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
    GE_CHECK_NOTNULL(src_out_anchor);

    GE_IF_BOOL_EXEC(!AttrUtils::GetBool(op_desc, kInputMutable, node_input_mutable),
        GELOGE(INTERNAL_ERROR, "node:%s get attr:_input_mutable failed.", node->GetName().c_str()); return FAILED);
    if (!node_input_mutable) {
    int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
    if (src_out_anchor_size == kAnchorSize) {
      // Identity needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
        if (ret != SUCCESS) {
          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
          return ret;
        }
      }
      continue;
    }

    GELOGI("hcom op is:%s.", op_desc->GetName().c_str());
    Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
    if (ret != SUCCESS) {
      GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
      return ret;
    }
  }
  return SUCCESS;
 }


 // If broadcast input size is bigger than 1, and input from variable,
 // cause by broadcast input memory should be continuous,
 // another featuremap mem will be allocated for broadcast input.
 // In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step.
 // In order to avoid move action out of model, use memcpy node instead of move action code.
 Status HcclMemcpyPass::ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
  auto op_desc = node->GetOpDesc();

  bool is_input_continuous = false;
  (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);

  if (is_input_continuous && op_desc->GetInputsSize() > 1) {
    GELOGI("continuous input op is:%s.", op_desc->GetName().c_str());
    // if input size bigger than one, insert memcpy between var data for support continous mem alloc
    for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
      if (hccl_in_anchor == nullptr) {
        continue;
      }
      auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
      GE_CHECK_NOTNULL(src_out_anchor);

      int32_t src_out_anchor_size = src_out_anchor->GetPeerInDataAnchors().size();
      if (src_out_anchor_size == kAnchorSize) {
        // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
        NodePtr src_node = src_out_anchor->GetOwnerNode();
        std::string src_type = src_node->GetType();
        bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT);
        if (check_src_type) {
          Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
          if (ret != SUCCESS) {
            GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
            return ret;
          }
      if (src_out_anchor == nullptr) {
        GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
        return INTERNAL_ERROR;
      }

      if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
        Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
        if (ret != SUCCESS) {
          GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
          return ret;
        }
        continue;
      }
    }
  }
  return SUCCESS;
 }

 // if input is var type, and node input need p2p mem, then memcpy should be insert between the two
 Status HcclMemcpyPass::P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node) {
  auto op_desc = node->GetOpDesc();

  vector<int64_t> input_memory_types;
  (void) ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, input_memory_types);

  if (input_memory_types.empty()) {
    return SUCCESS;
  }

  for (uint32_t index = 0; index < input_memory_types.size() && index < op_desc->GetInputsSize(); index++) {
    if (input_memory_types[index] != RT_MEMORY_P2P_DDR) {
      continue;
    }

    GELOGI("p2p input op is:%s.", op_desc->GetName().c_str());
    auto hccl_in_anchor = node->GetInDataAnchor(index);
    if (hccl_in_anchor == nullptr) {
      continue;
    }
    auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
    if (src_out_anchor == nullptr) {
      GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
      return INTERNAL_ERROR;
    }

    if (IsDataNode(src_out_anchor->GetOwnerNode()->GetType())) {
      Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
      if (ret != SUCCESS) {
        GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
@@ -82,8 +188,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
  return SUCCESS;
 }

 bool HcclMemcpyPass::IsDataNode(const std::string& node_type) {
  return (node_type == CONSTANTOP) || (node_type == VARIABLE) || (node_type == DATA) || (node_type == CONSTANT);
 }

 ///
 /// @brief Add MemcpyAsync Node
 /// @brief Add Identity Node
 /// @param [in] ge::ComputeGraphPtr graph
 /// @param [in] ge::OutDataAnchorPtr in_node
 /// @return ge::NodePtr
@@ -101,20 +211,20 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O
  node_name = CheckDuplicateName(node_name);
  OpDescPtr op_desc = MakeShared<OpDesc>(node_name.c_str(), IDENTITY);
  if (op_desc == nullptr) {
    GELOGE(INTERNAL_ERROR, "Create identity op: MakeShared op_desc fail.");
    GELOGE(INTERNAL_ERROR, "Create Identity op: MakeShared op_desc fail.");
    return nullptr;
  }
  GELOGI("Create identity op:%s.", op_desc->GetName().c_str());
  GELOGI("Create Identity op:%s.", op_desc->GetName().c_str());

  graphStatus ret = op_desc->AddInputDesc("x", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
  if (ret != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Create identity op: add input desc fail.");
    GELOGE(INTERNAL_ERROR, "Create Identity op: add input desc fail.");
    return nullptr;
  }

  ret = op_desc->AddOutputDesc("y", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx()));
  if (ret != GRAPH_SUCCESS) {
    GELOGE(INTERNAL_ERROR, "Create identity op: add output desc fail.");
    GELOGE(INTERNAL_ERROR, "Create Identity op: add output desc fail.");
    return nullptr;
  }
  // because history reason ,this pass can not do work after constant fold so mark it
@@ -122,7 +232,7 @@ NodePtr HcclMemcpyPass::CreateIdentityNode(const ComputeGraphPtr &graph, const O

  NodePtr memcpy_node = graph->AddNode(op_desc);
  if (memcpy_node == nullptr) {
    GELOGE(INTERNAL_ERROR, "Insert identity node fail.");
    GELOGE(INTERNAL_ERROR, "Insert Identity node fail.");
    return nullptr;
  }

@@ -155,7 +265,8 @@ std::string HcclMemcpyPass::CheckDuplicateName(const std::string &node_name) {
 ///
 Status HcclMemcpyPass::ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
                                            const InDataAnchorPtr &hccl_in_anchor) {
  GELOGI("The op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str());
  GELOGI("Between op %s and op %s need insert memcpy async op.", src_out_anchor->GetOwnerNode()->GetName().c_str(),
    hccl_in_anchor->GetOwnerNode()->GetName().c_str());
  NodePtr memcpy_node = CreateIdentityNode(graph, src_out_anchor);
  GE_CHECK_NOTNULL(memcpy_node);

--- a/ge/graph/passes/hccl_memcpy_pass.h
+++ b/ge/graph/passes/hccl_memcpy_pass.h
@@ -37,6 +37,14 @@ class HcclMemcpyPass : public GraphPass {
  Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
          const InDataAnchorPtr &hccl_in_anchor);

  Status ContinuousInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

  Status MutableInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

  Status P2pmemInputProcess(const ComputeGraphPtr &graph, const NodePtr node);

  bool IsDataNode(const std::string& node_type);

  std::unordered_map<std::string, uint32_t> node_num_map_;
 };
 }  // namespace ge
--- a/ge/graph/preprocess/graph_preprocess.cc
+++ b/ge/graph/preprocess/graph_preprocess.cc
@@ -60,7 +60,6 @@
 #include "graph/passes/get_original_format_pass.h"
 #include "graph/passes/guarantee_const_pass.h"
 #include "graph/passes/hccl_group_pass.h"
 #include "graph/passes/hccl_memcpy_pass.h"
 #include "graph/passes/identity_pass.h"
 #include "graph/passes/infershape_pass.h"
 #include "graph/passes/iterator_op_pass.h"
@@ -1693,8 +1692,6 @@ Status GraphPrepare::PrepareOptimize() {
  PassManager graph_pass;
  try {
    (void)graph_pass.AddPass("PrepareOptimize::PrunePass", new PrunePass);
    // todo 临时把hccl的memcpy插入放到图准备，为了防止其多插memcpy
    (void)graph_pass.AddPass("PrepareOptimize::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass);
  } catch (std::bad_alloc &e) {
    GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
    return INTERNAL_ERROR;
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -245,6 +245,11 @@ const std::string INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16";
 // 0: close debug; 1: open TBE compiler; 2: open ccec compiler
 const std::string OP_DEBUG_LEVEL = "ge.opDebugLevel";

 // Configure for fix hcombroadcast format.
 // when config model multi, broadcast format should be fixed
 // 0: data multi; 1: model multi;
 const std::string HCOM_MULTI_MODE = "ge.hcomMultiMode";

 // Graph run mode
 enum GraphRunMode { PREDICTION = 0, TRAIN };