diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 1cdb2efa..9b2da3c7 100644 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -89,6 +89,14 @@ Status VariableMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } +Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() { + Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_); + if (result != ge::SUCCESS) { + return result; + } + return ge::SUCCESS; +} + Status GraphMemoryAssigner::AssignMemory() { ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_)); if (mem_assigner->Assign() != ge::SUCCESS) { @@ -131,6 +139,19 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } +ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() { + auto variable_assigner = + std::unique_ptr(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_)); + if (variable_assigner == nullptr) { + GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed."); + return ge::FAILED; + } + if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) { + return ge::FAILED; + } + return ge::SUCCESS; +} + ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index 201e6d01..966cbd14 100644 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -63,6 +63,8 @@ class VariableMemoryAssigner { /// ge::Status AssignVarAttr2Nodes(); + ge::Status AssignMemory2HasRefAttrNode(); + private: ge::ComputeGraphPtr compute_graph_; }; @@ -99,6 +101,8 @@ class GraphMemoryAssigner { ge::Status ReAssignMemory(bool is_loop_graph, size_t &mem_offset); + ge::Status AssignMemory2HasRefAttrNode(); + ge::Status AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size); ge::Status SetInputOffset(); diff --git a/ge/graph/build/memory/memory_assigner.cc b/ge/graph/build/memory/memory_assigner.cc index e36f082e..40a72f37 100644 --- a/ge/graph/build/memory/memory_assigner.cc +++ b/ge/graph/build/memory/memory_assigner.cc @@ -40,6 +40,11 @@ Status MemoryAssigner::AssignMemory(bool is_loop_graph, size_t &mem_offset, size return ge::FAILED; } + if (graph_mem_assigner.AssignMemory2HasRefAttrNode() != ge::SUCCESS) { + GELOGE(ge::FAILED, "Assign reference memory failed!"); + return ge::FAILED; + } + // Assign memory for reference if (graph_mem_assigner.AssignReferenceMemory() != ge::SUCCESS) { GELOGE(ge::FAILED, "Assign reference memory failed!"); diff --git a/ge/graph/build/memory/var_mem_assign_util.cc b/ge/graph/build/memory/var_mem_assign_util.cc index a352cf65..605f918d 100644 --- a/ge/graph/build/memory/var_mem_assign_util.cc +++ b/ge/graph/build/memory/var_mem_assign_util.cc @@ -34,7 +34,6 @@ using std::vector; namespace ge { Status VarMemAssignUtil::AssignVarMemory(ge::ComputeGraphPtr &compute_graph) { GE_CHK_STATUS_RET(AssignMemory2VariableNode(compute_graph)); - GE_CHK_STATUS_RET(AssignMemory2HasRefAttrNode(compute_graph)); return SUCCESS; } diff --git a/ge/hybrid/common/npu_memory_allocator.cc b/ge/hybrid/common/npu_memory_allocator.cc index cbb556e2..50696f29 100644 --- a/ge/hybrid/common/npu_memory_allocator.cc +++ b/ge/hybrid/common/npu_memory_allocator.cc @@ -45,16 +45,9 @@ NpuMemoryAllocator *NpuMemoryAllocator::GetAllocator() { NpuMemoryAllocator::NpuMemoryAllocator(uint32_t device_id) : device_id_(device_id) {} void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { - void *try_reuse_addr = nullptr; size_t allocate_size = size; MemStorageType mem_type = HBM; if (attr != nullptr) { - try_reuse_addr = attr->try_reuse_addr_; - if (attr->padding_ != 0) { - // padding up to multiple of attr->padding, and add extra attr->padding_ - allocate_size = (size + 2 * attr->padding_ - 1) / attr->padding_ * attr->padding_; - GELOGD("Padding size %ld by %d. final size = %zu.", size, attr->padding_, allocate_size); - } mem_type = attr->mem_type_; } @@ -69,6 +62,18 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) { } else if (mem_type == HOST_DDR) { buffer = malloc(allocate_size); } else { + void *try_reuse_addr = nullptr; + int padding = kDefaultPadding; + if (attr != nullptr) { + try_reuse_addr = attr->try_reuse_addr_; + if (attr->padding_ > 0) { + padding = attr->padding_; + } + + // padding up to multiple of padding, and add extra padding + allocate_size = (size + 2 * padding - 1) / padding * padding; + GELOGD("Padding size %ld by %d. final size = %zu.", size, padding, allocate_size); + } buffer = MemManager::Instance() .CachingInstance(RT_MEMORY_HBM) .Malloc(allocate_size, reinterpret_cast(try_reuse_addr), device_id_);