Browse Source

1.if broadcast only one input or output, skip continuous mem reassign

2.if broadcast input more than one, and from variable, add memcpy node between them. delete move variable to broadcast input in davinci model run
pull/277/head
wangxiaotian22 4 years ago
parent
commit
082f9195d4
4 changed files with 63 additions and 10 deletions
  1. +16
    -3
      ge/graph/build/memory/block_mem_assigner.cc
  2. +0
    -6
      ge/graph/load/new_model_manager/davinci_model.cc
  3. +45
    -1
      ge/graph/passes/hccl_memcpy_pass.cc
  4. +2
    -0
      ge/graph/passes/hccl_memcpy_pass.h

+ 16
- 3
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -425,6 +425,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
atomic_addr_clean_id_ = node_op_desc->GetId(); atomic_addr_clean_id_ = node_op_desc->GetId();
} }


// if input size just one, no need to reassign continuous memory
bool is_input_continuous = false;
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
if (is_input_continuous && (node_op_desc->GetInputSize() <= 1)) {
(void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true);
}

for (auto &out_anchor : n->GetAllOutDataAnchors()) { for (auto &out_anchor : n->GetAllOutDataAnchors()) {
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
bool reuse_input = false; bool reuse_input = false;
@@ -928,6 +935,13 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null.");
auto node_op_desc = n->GetOpDesc(); auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null.");
// if output size just one, no need to reassign continuous memory
if (node_op_desc->GetOutputsSize() == 1) {
zero_memory_list_.emplace_back(n, kOutput, 0);
return nullptr;
}

MemoryBlock *block = nullptr; MemoryBlock *block = nullptr;
int64_t total_size = 0; int64_t total_size = 0;
int64_t memory_type = RT_MEMORY_HBM; int64_t memory_type = RT_MEMORY_HBM;
@@ -1746,9 +1760,8 @@ Status BlockMemAssigner::Assign() {


bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const {
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) ||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) ||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) ||
(node_type == HVDCALLBACKBROADCAST);
(node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) ||
(node_type == ASSIGN) || (node_type == HVDWAIT);
} }


bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) {


+ 0
- 6
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -1993,12 +1993,6 @@ Status DavinciModel::SyncVarData() {
RT_MEMCPY_HOST_TO_DEVICE)); RT_MEMCPY_HOST_TO_DEVICE));
} }


for (auto op_desc : variable_op_list_) {
ret =
VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, op_desc->GetName(), op_desc, mem_base_);
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_,
op_desc->GetName().c_str());
}
return ret; return ret;
} }




+ 45
- 1
ge/graph/passes/hccl_memcpy_pass.cc View File

@@ -37,6 +37,12 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
auto op_desc = node->GetOpDesc(); auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); GE_IF_BOOL_EXEC(op_desc == nullptr, continue);


Status ret = ProcessBroadcastMemcpy(graph, node);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "failed ProcessBroadcastMemcpy.");
return ret;
}

bool node_input_mutable = false; bool node_input_mutable = false;
if (!AttrUtils::HasAttr(op_desc, kInputMutable)) { if (!AttrUtils::HasAttr(op_desc, kInputMutable)) {
continue; continue;
@@ -61,7 +67,7 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
// Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared. // Memcpyasync needs to be inserted between constant (/data) and hcomallreduce to avoid constant being cleared.
NodePtr src_node = src_out_anchor->GetOwnerNode(); NodePtr src_node = src_out_anchor->GetOwnerNode();
std::string src_type = src_node->GetType(); std::string src_type = src_node->GetType();
bool check_src_type = (src_type == CONSTANTOP) || (src_type == DATA) || (src_type == CONSTANT);
bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT);
if (check_src_type) { if (check_src_type) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor); Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) { if (ret != SUCCESS) {
@@ -82,6 +88,44 @@ Status HcclMemcpyPass::Run(ge::ComputeGraphPtr graph) {
return SUCCESS; return SUCCESS;
} }


// If broadcast input size is bigger than 1, and input from variable,
// cause by broadcast input memory should be continuous,
// another featuremap mem will be allocated for broadcast input.
// In this condition, move data from variable mem to broadcast input featuremap mem will be executed each step.
// In order to avoid move action out of model, use memcpy node instead of move action code.
Status HcclMemcpyPass::ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node) {
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "node has no op_desc, node_name : %s.", node->GetName().c_str());
return INTERNAL_ERROR;
}

if ((node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) && op_desc->GetInputSize() > 1) {
for (auto &hccl_in_anchor : node->GetAllInDataAnchors()) {
if (hccl_in_anchor == nullptr) {
continue;
}
auto src_out_anchor = hccl_in_anchor->GetPeerOutAnchor();
if (src_out_anchor == nullptr) {
GELOGE(INTERNAL_ERROR, "hcom op input has no peer anchor, node_name:%s", node->GetName().c_str());
return INTERNAL_ERROR;
}
NodePtr src_node = src_out_anchor->GetOwnerNode();
std::string src_type = src_node->GetType();
bool check_src_type = (src_type == CONSTANTOP) || (src_type == VARIABLE) || (src_type == DATA) || (src_type == CONSTANT);
if (check_src_type) {
Status ret = ModifyEdgeConnection(graph, src_out_anchor, hccl_in_anchor);
if (ret != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Failed to modify the connection.");
return ret;
}
}
}
}
return SUCCESS;
}

/// ///
/// @brief Add MemcpyAsync Node /// @brief Add MemcpyAsync Node
/// @param [in] ge::ComputeGraphPtr graph /// @param [in] ge::ComputeGraphPtr graph


+ 2
- 0
ge/graph/passes/hccl_memcpy_pass.h View File

@@ -37,6 +37,8 @@ class HcclMemcpyPass : public GraphPass {
Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor, Status ModifyEdgeConnection(const ComputeGraphPtr &graph, const OutDataAnchorPtr &src_out_anchor,
const InDataAnchorPtr &hccl_in_anchor); const InDataAnchorPtr &hccl_in_anchor);


Status ProcessBroadcastMemcpy(const ComputeGraphPtr &graph, const NodePtr node);

std::unordered_map<std::string, uint32_t> node_num_map_; std::unordered_map<std::string, uint32_t> node_num_map_;
}; };
} // namespace ge } // namespace ge


Loading…
Cancel
Save