@@ -173,10 +173,12 @@ set(TRAIN_SRC_LIST | |||||
"graph/manager/graph_manager_utils.cc" | "graph/manager/graph_manager_utils.cc" | ||||
"graph/manager/graph_mem_allocator.cc" | "graph/manager/graph_mem_allocator.cc" | ||||
"graph/manager/graph_caching_allocator.cc" | "graph/manager/graph_caching_allocator.cc" | ||||
"graph/manager/session_scope_mem_allocator.cc" | |||||
"graph/manager/graph_var_manager.cc" | "graph/manager/graph_var_manager.cc" | ||||
"graph/manager/host_mem_manager.cc" | "graph/manager/host_mem_manager.cc" | ||||
"graph/manager/rdma_pool_allocator.cc" | "graph/manager/rdma_pool_allocator.cc" | ||||
"graph/manager/host_mem_allocator.cc" | "graph/manager/host_mem_allocator.cc" | ||||
"graph/manager/graph_mem_manager.cc" | |||||
"graph/manager/memory_api.cc" | "graph/manager/memory_api.cc" | ||||
"graph/manager/model_manager/event_manager.cc" | "graph/manager/model_manager/event_manager.cc" | ||||
"graph/manager/trans_var_data_utils.cc" | "graph/manager/trans_var_data_utils.cc" | ||||
@@ -478,6 +480,8 @@ set(INFER_SRC_LIST | |||||
"graph/manager/host_mem_allocator.cc" | "graph/manager/host_mem_allocator.cc" | ||||
"graph/manager/graph_mem_allocator.cc" | "graph/manager/graph_mem_allocator.cc" | ||||
"graph/manager/graph_caching_allocator.cc" | "graph/manager/graph_caching_allocator.cc" | ||||
"graph/manager/session_scope_mem_allocator.cc" | |||||
"graph/manager/graph_mem_manager.cc" | |||||
"model/ge_model.cc" | "model/ge_model.cc" | ||||
"model/ge_root_model.cc" | "model/ge_root_model.cc" | ||||
"graph/common/transop_util.cc" | "graph/common/transop_util.cc" | ||||
@@ -28,6 +28,8 @@ set(SRC_LIST | |||||
"../graph/manager/graph_var_manager.cc" | "../graph/manager/graph_var_manager.cc" | ||||
"../graph/manager/graph_mem_allocator.cc" | "../graph/manager/graph_mem_allocator.cc" | ||||
"../graph/manager/graph_caching_allocator.cc" | "../graph/manager/graph_caching_allocator.cc" | ||||
"../graph/manager/session_scope_mem_allocator.cc" | |||||
"../graph/manager/graph_mem_manager.cc" | |||||
"../graph/manager/trans_var_data_utils.cc" | "../graph/manager/trans_var_data_utils.cc" | ||||
"../graph/manager/util/debug.cc" | "../graph/manager/util/debug.cc" | ||||
"../graph/manager/rdma_pool_allocator.cc" | "../graph/manager/rdma_pool_allocator.cc" | ||||
@@ -26,7 +26,7 @@ | |||||
#include "graph/execute/graph_execute.h" | #include "graph/execute/graph_execute.h" | ||||
#include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
#include "graph/load/model_manager/davinci_model.h" | #include "graph/load/model_manager/davinci_model.h" | ||||
#include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
@@ -500,6 +500,7 @@ string MemoryBlock::String() { | |||||
ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " "; | ss << "Block size: " << Size() << " from " << HeadOffset() << " to " << TailOffset() << " "; | ||||
ss << "real_size_list: " << ToString(real_size_list_) << " "; | ss << "real_size_list: " << ToString(real_size_list_) << " "; | ||||
ss << "ref_count: " << ref_count_ << " "; | ss << "ref_count: " << ref_count_ << " "; | ||||
ss << "reuse_mem_: " << reuse_mem_ << " "; | |||||
ss << "members: "; | ss << "members: "; | ||||
for (auto x : NodeTypeIndexList()) { | for (auto x : NodeTypeIndexList()) { | ||||
ss << "__node: " << ToString(x) << " "; | ss << "__node: " << ToString(x) << " "; | ||||
@@ -513,8 +514,8 @@ string MemoryBlock::String() { | |||||
BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<string, string> &anchor_to_symbol, | BlockMemAssigner::BlockMemAssigner(ComputeGraphPtr compute_graph, const map<string, string> &anchor_to_symbol, | ||||
const map<string, list<NodeIndexIO>> &symbol_to_anchors) | const map<string, list<NodeIndexIO>> &symbol_to_anchors) | ||||
: mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), | |||||
symbol_to_anchors_(symbol_to_anchors), anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} | |||||
: compute_graph_(std::move(compute_graph)), symbol_to_anchors_(symbol_to_anchors), | |||||
anchor_to_symbol_(anchor_to_symbol), life_time_(0) {} | |||||
BlockMemAssigner::~BlockMemAssigner() { | BlockMemAssigner::~BlockMemAssigner() { | ||||
GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size()); | GELOGD("[Destruct][BlockMemAssigner]blocks_store_ size : %lu", blocks_store_.size()); | ||||
@@ -1123,7 +1124,7 @@ bool BlockMemAssigner::IsZeroCopyBlock(const NodePtr &node, bool continuous) { | |||||
MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, | ||||
OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | OpMemoryType mem_type, const NodePtr &n, uint32_t out_index, | ||||
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem, | ||||
const bool continuous, int64_t memory_type) { | |||||
const bool continuous, uint64_t memory_type) { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
n == nullptr, | n == nullptr, | ||||
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed"); | ||||
@@ -1824,8 +1825,8 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false); | zero_memory_list_.emplace_back(n, kWorkspace, static_cast<uint32_t>(i), false); | ||||
continue; | continue; | ||||
} | } | ||||
int64_t memory_type = RT_MEMORY_HBM; | |||||
if (!GetWorkSpaceMemoryType(n, i, memory_type)) { | |||||
uint64_t memory_type = RT_MEMORY_HBM; | |||||
if (!GetWorkSpaceMemoryType(n, i, memory_type, workspace_reuse_flag)) { | |||||
GELOGW("Get workspace memory type failed."); | GELOGW("Get workspace memory type failed."); | ||||
return; | return; | ||||
} | } | ||||
@@ -1860,7 +1861,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
} | } | ||||
void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id, | void BlockMemAssigner::CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id, | ||||
MemoryBlock *mem_block, int64_t memory_type) { | |||||
MemoryBlock *mem_block, uint64_t memory_type) { | |||||
bool reuse_mem_flag = | bool reuse_mem_flag = | ||||
((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; | ((workspace_reuse_flag.size() > index) && (workspace_reuse_flag[index] == false)) ? false : true; | ||||
if (reuse_mem_flag) { | if (reuse_mem_flag) { | ||||
@@ -1992,24 +1993,29 @@ void BlockMemAssigner::ReuseBlocksByLifeTime(size_t range_size) { | |||||
} | } | ||||
} | } | ||||
void AddBlockMemOffset(size_t &mem_offset, size_t &p2p_mem_offset, MemoryBlock &block) { | |||||
if (block.memory_type_ == RT_MEMORY_HBM) { | |||||
if (block.first_continuous_block_) { | |||||
mem_offset += MEM_ALIGN_SIZE; | |||||
} | |||||
block.Resize(); | |||||
block.SetHeadOffset(mem_offset); | |||||
mem_offset += block.Size(); | |||||
block.SetTailOffset(mem_offset - 1); | |||||
} else if (block.memory_type_ == RT_MEMORY_P2P_DDR) { | |||||
if (block.first_continuous_block_) { | |||||
p2p_mem_offset += MEM_ALIGN_SIZE; | |||||
void AddBlockMemOffset(std::map<uint64_t, size_t> &mem_offsets, MemoryBlock &block) { | |||||
auto it = mem_offsets.find(block.memory_type_); | |||||
if (it == mem_offsets.end()) { | |||||
auto result = mem_offsets.insert(std::pair<int64_t, size_t>(block.memory_type_, 0)); | |||||
// Insert failure is unlikely | |||||
if (!result.second) { | |||||
return; | |||||
} | } | ||||
block.Resize(); | |||||
block.SetHeadOffset(p2p_mem_offset); | |||||
p2p_mem_offset += block.Size(); | |||||
block.SetTailOffset(p2p_mem_offset - 1); | |||||
it = result.first; | |||||
} | |||||
if (it == mem_offsets.end()) { | |||||
return; | |||||
} | |||||
auto &mem_offset = it->second; | |||||
if (block.first_continuous_block_) { | |||||
mem_offset += MEM_ALIGN_SIZE; | |||||
} | } | ||||
block.Resize(); | |||||
block.SetHeadOffset(mem_offset); | |||||
mem_offset += block.Size(); | |||||
block.SetTailOffset(mem_offset - 1); | |||||
} | } | ||||
bool DynamicBatchBlockReuse(MemoryBlock &block) { | bool DynamicBatchBlockReuse(MemoryBlock &block) { | ||||
@@ -2036,27 +2042,27 @@ void BlockMemAssigner::ResizeDynamicBatchBlocks() { | |||||
} | } | ||||
} | } | ||||
size_t max_mem_offset = mem_offset_; | |||||
size_t max_p2p_mem_offset = p2p_mem_offset_; | |||||
std::map<uint64_t, size_t> max_mem_offsets = mem_offsets_; | |||||
for (auto &batch_blocks : dynamic_batch_blocks) { | for (auto &batch_blocks : dynamic_batch_blocks) { | ||||
size_t mem_offset = mem_offset_; | |||||
size_t p2p_mem_offset = p2p_mem_offset_; | |||||
std::map<uint64_t, size_t> mem_offsets = mem_offsets_; | |||||
for (auto block : batch_blocks.second) { | for (auto block : batch_blocks.second) { | ||||
if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { | if (block == nullptr || block->deleted_block_ || block->is_zero_copy_) { | ||||
continue; | continue; | ||||
} | } | ||||
AddBlockMemOffset(mem_offset, p2p_mem_offset, *block); | |||||
AddBlockMemOffset(mem_offsets, *block); | |||||
} | } | ||||
if (mem_offset > max_mem_offset) { | |||||
max_mem_offset = mem_offset; | |||||
} | |||||
if (p2p_mem_offset > max_p2p_mem_offset) { | |||||
max_p2p_mem_offset = p2p_mem_offset; | |||||
for (auto &it : mem_offsets) { | |||||
auto itmax = max_mem_offsets.find(it.first); | |||||
if (itmax == max_mem_offsets.end()) { | |||||
max_mem_offsets[it.first] = it.second; | |||||
} else if (it.second > itmax->second) { | |||||
itmax->second = it.second; | |||||
} | |||||
GELOGI("Batch:%s memory type:%ld offset:%zu", batch_blocks.first.c_str(), it.first, it.second); | |||||
} | } | ||||
GELOGI("Batch[%s] offset[%zu] p2p_offset[%zu]", batch_blocks.first.c_str(), mem_offset, p2p_mem_offset); | |||||
} | } | ||||
mem_offset_ = max_mem_offset; | |||||
p2p_mem_offset_ = max_p2p_mem_offset; | |||||
mem_offsets_ = max_mem_offsets; | |||||
} | } | ||||
/// | /// | ||||
@@ -2074,11 +2080,13 @@ void BlockMemAssigner::ResizeMemoryBlocks() { | |||||
continue; | continue; | ||||
} | } | ||||
AddBlockMemOffset(mem_offset_, p2p_mem_offset_, *memory_block); | |||||
AddBlockMemOffset(mem_offsets_, *memory_block); | |||||
} | } | ||||
ResizeDynamicBatchBlocks(); | ResizeDynamicBatchBlocks(); | ||||
GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu," | |||||
"theory_min_memory_size %zu", mem_offset_, p2p_mem_offset_, theory_min_memory_size_); | |||||
for (auto it : mem_offsets_) { | |||||
GELOGI("Memory type:%ld mem_offset exclude zero_copy_memory:%zu, theory_min_memory_size:%zu", it.first, it.second, | |||||
theory_min_memory_size_); | |||||
} | |||||
} | } | ||||
/// | /// | ||||
@@ -2217,7 +2225,8 @@ bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | |||||
(node_type == CONSTANTOP) || (node_type == HVDWAIT); | (node_type == CONSTANTOP) || (node_type == HVDWAIT); | ||||
} | } | ||||
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | |||||
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, | |||||
vector<bool> &workspace_reuse_flag) { | |||||
memory_type = RT_MEMORY_HBM; | memory_type = RT_MEMORY_HBM; | ||||
vector<int64_t> workspace_memory_type; | vector<int64_t> workspace_memory_type; | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
@@ -2233,6 +2242,20 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, | |||||
return false; | return false; | ||||
} | } | ||||
memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM; | ||||
vector<int32_t> workspace_no_reuse_scope; | |||||
bool has_workspace_no_reuse_scope = | |||||
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
if (has_workspace_no_reuse_scope && (index < workspace_no_reuse_scope.size()) | |||||
&& (workspace_no_reuse_scope[index] == kSessionNoReuse)) { | |||||
memory_type |= kSessionScopeMemory; | |||||
if (workspace_reuse_flag.empty()) { | |||||
workspace_reuse_flag.assign(workspace_no_reuse_scope.size(), true); | |||||
} | |||||
// set to no reuse | |||||
workspace_reuse_flag[index] = false; | |||||
GELOGI("%s's workspace is session scope no reuse, memory type:%lu.", node->GetName().c_str(), memory_type); | |||||
} | |||||
return true; | return true; | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -34,6 +34,10 @@ | |||||
namespace ge { | namespace ge { | ||||
const size_t kMaxLifeTime = 0xffffffff; | const size_t kMaxLifeTime = 0xffffffff; | ||||
const int32_t kInvalidThreadScopeId = -1; | const int32_t kInvalidThreadScopeId = -1; | ||||
const uint64_t kSessionScopeMemory = 0x100000000; | |||||
const uint64_t kMemoryTypeMask = 0xffffffff; | |||||
enum MemoryNoReuseScope { kReuse, kSessionNoReuse, kGraphNoReuse }; | |||||
using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | ||||
@@ -224,9 +228,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
Status Assign() override; | Status Assign() override; | ||||
size_t GetMemOffset() const { return mem_offset_; } | |||||
size_t GetP2PMemOffset() const { return p2p_mem_offset_; } | |||||
const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; } | |||||
int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } | int64_t GetAtomicAddrCleanId() const { return atomic_addr_clean_id_; } | ||||
@@ -329,14 +331,10 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// | /// | ||||
void UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_io_list, int64_t memory_type); | void UpdateOpTensorMemType(std::list<NodeIndexIO> node_index_io_list, int64_t memory_type); | ||||
size_t mem_offset_; | |||||
size_t p2p_mem_offset_; | |||||
std::map<uint64_t, size_t> mem_offsets_; | |||||
ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
std::vector<MemoryBlock *> memory_blocks_; | std::vector<MemoryBlock *> memory_blocks_; | ||||
std::vector<MemoryBlock *> blocks_store_; | std::vector<MemoryBlock *> blocks_store_; | ||||
std::vector<NodeTypeIndex> zero_memory_list_; | std::vector<NodeTypeIndex> zero_memory_list_; | ||||
// ref mapping | // ref mapping | ||||
@@ -380,7 +378,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// | /// | ||||
MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, | MemoryBlock *ApplyMemory(size_t block_size, size_t real_size, size_t no_align_size, OpMemoryType mem_type, | ||||
const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag, | const ge::NodePtr &n, uint32_t out_index, const std::vector<bool> &workspace_reuse_flag, | ||||
const bool is_op_reuse_mem, const bool continuous, int64_t memory_type); | |||||
const bool is_op_reuse_mem, const bool continuous, uint64_t memory_type); | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
@@ -394,7 +392,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
/// @author | /// @author | ||||
/// | /// | ||||
void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id, | void CheckWorkspaceReuse(const vector<bool> &workspace_reuse_flag, uint32_t index, int64_t stream_id, | ||||
MemoryBlock *mem_block, int64_t memory_type); | |||||
MemoryBlock *mem_block, uint64_t memory_type); | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
@@ -457,7 +455,8 @@ class BlockMemAssigner : public MemAssigner { | |||||
bool IsContinuousOutput(const NodePtr &n); | bool IsContinuousOutput(const NodePtr &n); | ||||
bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | |||||
bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, uint64_t &memory_type, | |||||
vector<bool> &workspace_reuse_flag); | |||||
void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); | void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); | ||||
@@ -107,11 +107,22 @@ Status GraphMemoryAssigner::AssignMemory() { | |||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | ||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | |||||
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | |||||
if (mem_assigner->GetP2PMemOffset() >= 0) { | |||||
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); | |||||
for (auto pair : mem_assigner->GetMemOffsets()) { | |||||
MemoryOffset offset(pair.first, pair.second); | |||||
memory_offset_.emplace(pair.first, offset); | |||||
} | |||||
// base memtype offset must be exist | |||||
auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it == mem_assigner->GetMemOffsets().end()) { | |||||
MemoryOffset memory_offset(RT_MEMORY_HBM, 0); | |||||
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | |||||
} | |||||
it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR); | |||||
if (it == mem_assigner->GetMemOffsets().end()) { | |||||
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0); | |||||
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); | memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); | ||||
} | } | ||||
@@ -224,7 +235,7 @@ ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &out | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) { | |||||
Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) { | |||||
if (memory_offset_.empty()) { | if (memory_offset_.empty()) { | ||||
REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s", | REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s", | ||||
compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | ||||
@@ -264,7 +275,7 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | |||||
Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) { | |||||
BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger()); | ||||
if (priority_assigner == nullptr) { | if (priority_assigner == nullptr) { | ||||
REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s", | REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s", | ||||
@@ -1398,6 +1409,9 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { | |||||
"graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str()); | ||||
} | } | ||||
for (auto pair : memory_offset_) { | for (auto pair : memory_offset_) { | ||||
if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) { | |||||
continue; | |||||
} | |||||
GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), | ||||
pair.second.mem_offset_, pair.first); | pair.second.mem_offset_, pair.first); | ||||
} | } | ||||
@@ -103,9 +103,9 @@ class GraphMemoryAssigner { | |||||
ge::Status AssignMemory2HasRefAttrNode(); | ge::Status AssignMemory2HasRefAttrNode(); | ||||
ge::Status ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset); | |||||
ge::Status ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset); | |||||
ge::Status AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size); | |||||
ge::Status AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size); | |||||
ge::Status SetInputOffset(); | ge::Status SetInputOffset(); | ||||
@@ -23,7 +23,7 @@ | |||||
namespace ge { | namespace ge { | ||||
HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) | HybridMemAssigner::HybridMemAssigner(ge::ComputeGraphPtr compute_graph) | ||||
: mem_offset_(0), p2p_mem_offset_(0), compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} | |||||
: compute_graph_(std::move(compute_graph)), priority_assigner_(nullptr) {} | |||||
Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size) { | Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size) { | ||||
vector<int64_t> ranges; | vector<int64_t> ranges; | ||||
@@ -36,7 +36,10 @@ Status HybridMemAssigner::AssignMemory(std::unique_ptr<BlockMemAssigner> &block_ | |||||
block_assigner->AssignMemoryWithReuse(ranges); | block_assigner->AssignMemoryWithReuse(ranges); | ||||
mem_size = block_assigner->GetMemOffset(); | |||||
// total size | |||||
for (auto it : block_assigner->GetMemOffsets()) { | |||||
mem_size += it.second; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -73,8 +76,7 @@ Status HybridMemAssigner::Assign() { | |||||
} | } | ||||
priority_assigner->SetOpMemOffset(false); | priority_assigner->SetOpMemOffset(false); | ||||
mem_offset_ = priority_assigner->GetMemOffset(); | |||||
p2p_mem_offset_ = priority_assigner->GetP2PMemOffset(); | |||||
mem_offsets_ = priority_assigner->GetMemOffsets(); | |||||
priority_assigner_ = std::move(priority_assigner); | priority_assigner_ = std::move(priority_assigner); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -42,16 +42,14 @@ class HybridMemAssigner : public MemAssigner { | |||||
Status Assign() override; | Status Assign() override; | ||||
size_t GetMemOffset() const { return mem_offset_; } | |||||
size_t GetP2PMemOffset() const { return p2p_mem_offset_; } | |||||
const std::map<uint64_t, size_t> &GetMemOffsets() const { return mem_offsets_; } | |||||
BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } | BlockMemAssignerPtr GetPriorityAssinger() const { return priority_assigner_; } | ||||
private: | private: | ||||
Status AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size); | Status AssignMemory(std::unique_ptr<BlockMemAssigner> &block_assigner, size_t &mem_size); | ||||
size_t mem_offset_; | |||||
size_t p2p_mem_offset_; | |||||
std::map<uint64_t, size_t> mem_offsets_; | |||||
ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
@@ -20,7 +20,7 @@ | |||||
#include "graph/build/memory/graph_mem_assigner.h" | #include "graph/build/memory/graph_mem_assigner.h" | ||||
namespace ge { | namespace ge { | ||||
Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) { | |||||
Status MemoryAssigner::AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size) { | |||||
GraphMemoryAssigner graph_mem_assigner(compute_graph_); | GraphMemoryAssigner graph_mem_assigner(compute_graph_); | ||||
if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { | if (graph_mem_assigner.AssignMemory() != ge::SUCCESS) { | ||||
@@ -47,6 +47,7 @@ | |||||
#include "omg/version.h" | #include "omg/version.h" | ||||
#include "register/op_registry.h" | #include "register/op_registry.h" | ||||
#include "graph/passes/set_input_output_offset_pass.h" | #include "graph/passes/set_input_output_offset_pass.h" | ||||
#include "graph/build/memory/block_mem_assigner.h" | |||||
using std::map; | using std::map; | ||||
using std::set; | using std::set; | ||||
@@ -398,9 +399,21 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); | REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); | ||||
GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); | GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_MEMORY_SIZE.c_str()); | ||||
return FAILED); | return FAILED); | ||||
auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); | |||||
size_t session_scope_mem_offset = 0; | |||||
auto it = mem_type_to_mem_offset_.find(mem_type_session_scope); | |||||
if (it != mem_type_to_mem_offset_.end()) { | |||||
session_scope_mem_offset = it->second; | |||||
} | |||||
if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { | if (mem_type_to_mem_offset_.find(RT_MEMORY_P2P_DDR) != mem_type_to_mem_offset_.end()) { | ||||
p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; | p2p_mem_offset_ = mem_type_to_mem_offset_[RT_MEMORY_P2P_DDR]; | ||||
} | } | ||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset), | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", | |||||
ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE.c_str()); | |||||
GELOGE(FAILED, "SetInt of ATTR_NAME_SESSION_SCOPE_MEMORY_SIZE failed."); | |||||
return FAILED); | |||||
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), | GE_CHK_BOOL_EXEC(ge::AttrUtils::SetInt(&model, ATTR_MODEL_P2P_MEMORY_SIZE, p2p_mem_offset_), | ||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); | REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); | ||||
GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); | GELOGE(FAILED, "[Set][Attr] %s in model failed", ATTR_MODEL_P2P_MEMORY_SIZE.c_str()); | ||||
@@ -434,8 +447,8 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||||
REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str()); | REPORT_INNER_ERROR("E19999", "Set Attr:%s in model failed", ATTR_MODEL_OUT_NODES_NAME.c_str()); | ||||
GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str()); | GELOGE(FAILED, "[Set][Str] %s in model failed.", ATTR_MODEL_OUT_NODES_NAME.c_str()); | ||||
return FAILED); | return FAILED); | ||||
GELOGI("For model, max_mem_offset_: %zu, p2p_mem_size: %zu, zero_copy_mem_size_: %zu", max_mem_offset_, | |||||
p2p_mem_offset_, zero_copy_mem_size_); | |||||
GELOGI("For model, max_mem_offset: %zu, p2p_mem_size: %zu, zero_copy_mem_size: %zu, session_scope_mem_size: %zu", | |||||
max_mem_offset_, p2p_mem_offset_, zero_copy_mem_size_, session_scope_mem_offset); | |||||
string fp_ceiling_mode; | string fp_ceiling_mode; | ||||
if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { | if (ge::GetContext().GetOption("ge.fpCeilingMode", fp_ceiling_mode) == SUCCESS) { | ||||
if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | if (!ge::AttrUtils::SetStr(&model, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | ||||
@@ -93,7 +93,7 @@ class ModelBuilder { | |||||
uint64_t session_id_; | uint64_t session_id_; | ||||
map<int64_t, size_t> mem_type_to_mem_offset_; | |||||
map<uint64_t, size_t> mem_type_to_mem_offset_; | |||||
size_t weight_offset_; | size_t weight_offset_; | ||||
@@ -40,7 +40,7 @@ | |||||
#include "graph/load/model_manager/cpu_queue_schedule.h" | #include "graph/load/model_manager/cpu_queue_schedule.h" | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "graph/load/model_manager/tbe_handle_store.h" | #include "graph/load/model_manager/tbe_handle_store.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
#include "graph/manager/util/debug.h" | #include "graph/manager/util/debug.h" | ||||
@@ -60,6 +60,8 @@ | |||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "common/formats/utils/formats_trans_utils.h" | #include "common/formats/utils/formats_trans_utils.h" | ||||
#include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
#include "graph/build/memory/block_mem_assigner.h" | |||||
#include "graph/manager/session_scope_mem_allocator.h" | |||||
// create std::thread, catch exceptions using try/catch | // create std::thread, catch exceptions using try/catch | ||||
#define CREATE_STD_THREAD(thread_id, func, args) \ | #define CREATE_STD_THREAD(thread_id, func, args) \ | ||||
@@ -168,7 +170,6 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener | |||||
mem_base_(nullptr), | mem_base_(nullptr), | ||||
is_inner_mem_base_(false), | is_inner_mem_base_(false), | ||||
is_inner_weight_base_(false), | is_inner_weight_base_(false), | ||||
is_inner_p2p_mem_base_(false), | |||||
data_inputer_(nullptr), | data_inputer_(nullptr), | ||||
load_begin_time_(0), | load_begin_time_(0), | ||||
load_end_time_(0), | load_end_time_(0), | ||||
@@ -236,7 +237,7 @@ DavinciModel::~DavinciModel() { | |||||
FreeFeatureMapMem(); | FreeFeatureMapMem(); | ||||
FreeP2PMem(); | |||||
FreeExMem(); | |||||
OpDebugUnRegister(); | OpDebugUnRegister(); | ||||
@@ -389,7 +390,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
is_feature_map_mem_has_inited_ = true; | is_feature_map_mem_has_inited_ = true; | ||||
std::size_t data_size = TotalMemSize(); | std::size_t data_size = TotalMemSize(); | ||||
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | |||||
if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | ||||
REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr or mem_size:%zu < ge_model.mem_size:%zu, " | REPORT_INNER_ERROR("E19999", "Param dev_ptr is nullptr or mem_size:%zu < ge_model.mem_size:%zu, " | ||||
@@ -400,7 +400,6 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
} | } | ||||
mem_base_ = static_cast<uint8_t *>(dev_ptr); | mem_base_ = static_cast<uint8_t *>(dev_ptr); | ||||
p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr); | |||||
is_inner_mem_base_ = false; | is_inner_mem_base_ = false; | ||||
if (TotalMemSize() && mem_base_ == nullptr) { | if (TotalMemSize() && mem_base_ == nullptr) { | ||||
@@ -422,24 +421,13 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
is_inner_mem_base_ = true; | is_inner_mem_base_ = true; | ||||
} | } | ||||
if (p2p_data_size != 0) { | |||||
p2p_mem_base_ = MallocP2PMem(p2p_data_size); | |||||
if (p2p_mem_base_ == nullptr) { | |||||
REPORT_CALL_ERROR("E19999", "MallocFeatureMapMem fail, p2p_data_size:%zu, model_id:%u, check invalid", | |||||
p2p_data_size, model_id_); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Alloc][Memory] for p2p failed, size:%zu, model_id:%u", | |||||
p2p_data_size, model_id_); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | |||||
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||||
p2p_mem_base_, p2p_data_size); | |||||
is_inner_p2p_mem_base_ = true; | |||||
if (!runtime_param_.memory_infos.empty()) { | |||||
GE_CHK_STATUS_RET(MallocExMem(), "MallocExMem failed."); | |||||
} | } | ||||
GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_); | GE_CHK_STATUS_RET(InitVariableMem(), "[Init][VariableMemory] failed, model_id:%u", model_id_); | ||||
runtime_param_.mem_base = mem_base_; | runtime_param_.mem_base = mem_base_; | ||||
runtime_param_.weight_base = weights_mem_base_; | runtime_param_.weight_base = weights_mem_base_; | ||||
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR].memory_base = p2p_mem_base_; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -465,7 +453,6 @@ Status DavinciModel::InitVariableMem() { | |||||
void DavinciModel::InitRuntimeParams() { | void DavinciModel::InitRuntimeParams() { | ||||
int64_t value = 0; | int64_t value = 0; | ||||
bool ret; | bool ret; | ||||
MemInfo p2p_mem_info; | |||||
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); | ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_MEMORY_SIZE, value); | ||||
runtime_param_.mem_size = ret ? (uint64_t)value : 0; | runtime_param_.mem_size = ret ? (uint64_t)value : 0; | ||||
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); | ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_WEIGHT_SIZE, value); | ||||
@@ -490,16 +477,18 @@ void DavinciModel::InitRuntimeParams() { | |||||
runtime_param_.var_size = ret ? (uint64_t)value : 0; | runtime_param_.var_size = ret ? (uint64_t)value : 0; | ||||
session_id_ = runtime_param_.session_id; | session_id_ = runtime_param_.session_id; | ||||
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); | ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_P2P_MEMORY_SIZE, value); | ||||
p2p_mem_info.memory_size = ret ? (uint64_t)value : 0; | |||||
MemInfo p2p_mem_info; | |||||
p2p_mem_info.memory_size = static_cast<size_t>(ret ? value : 0); | |||||
p2p_mem_info.memory_type = RT_MEMORY_P2P_DDR; | |||||
p2p_mem_info.memory_key = "_p"; | |||||
runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); | runtime_param_.memory_infos[RT_MEMORY_P2P_DDR] = std::move(p2p_mem_info); | ||||
GELOGI( | |||||
"InitRuntimeParams(), session_id:%lu, stream_num:%u, event_num:%u, label_num:%u, " | |||||
"logic_mem_base:0x%lx, logic_weight_base:0x%lx, logic_var_base:0x%lx, " | |||||
"memory_size:%lu, weight_size:%lu, var_size:%lu", | |||||
runtime_param_.session_id, runtime_param_.stream_num, runtime_param_.event_num, runtime_param_.label_num, | |||||
runtime_param_.logic_mem_base, runtime_param_.logic_weight_base, runtime_param_.logic_var_base, | |||||
runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | |||||
ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, value); | |||||
MemInfo session_scope_mem_info; | |||||
session_scope_mem_info.memory_size = static_cast<size_t>(ret ? value : 0); | |||||
runtime_param_.memory_infos[kSessionScopeMemory | RT_MEMORY_HBM] = std::move(session_scope_mem_info); | |||||
GELOGI("InitRuntimeParams(), %s.", runtime_param_.ToString().c_str()); | |||||
} | } | ||||
void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { | void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { | ||||
@@ -4089,14 +4078,15 @@ Status DavinciModel::InitEntryTask() { | |||||
uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | ||||
uint8_t *mem_base = nullptr; | uint8_t *mem_base = nullptr; | ||||
const string purpose("feature map,used for op input and output."); | const string purpose("feature map,used for op input and output."); | ||||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | ||||
if (res == EN_OK) { | if (res == EN_OK) { | ||||
data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); | data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); | ||||
string memory_key = std::to_string(0) + "_f"; | string memory_key = std::to_string(0) + "_f"; | ||||
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); | |||||
mem_base = | |||||
MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, data_size, GetDeviceId()); | |||||
} else { | } else { | ||||
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, data_size, GetDeviceId()); | |||||
mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, data_size, GetDeviceId()); | |||||
} | } | ||||
if (mem_base != nullptr) { | if (mem_base != nullptr) { | ||||
@@ -4105,83 +4095,119 @@ uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | |||||
return mem_base; | return mem_base; | ||||
} | } | ||||
uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { | |||||
uint8_t *p2p_mem_base = nullptr; | |||||
const string purpose("p2p memory, used for some op related to hcom"); | |||||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
string p2p_memory_key = std::to_string(0) + "_p"; | |||||
p2p_mem_base = | |||||
MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_memory_key, p2p_data_size, GetDeviceId()); | |||||
} else { | |||||
p2p_mem_base = MemManager::Instance(RT_MEMORY_P2P_DDR)->MallocMemory(purpose, p2p_data_size, GetDeviceId()); | |||||
Status DavinciModel::MallocExMem() { | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
for (auto it : runtime_param_.memory_infos) { | |||||
auto mem_size = it.second.memory_size; | |||||
if (mem_size == 0) { | |||||
continue; | |||||
} | |||||
bool sessoion_scope = ((kSessionScopeMemory & it.first) == kSessionScopeMemory); | |||||
auto mem_type = it.first & kMemoryTypeMask; | |||||
uint8_t *mem_base = nullptr; | |||||
const string purpose("p2p memory, used for some op related to hcom or session scope memory"); | |||||
if (sessoion_scope) { | |||||
mem_base = MemManager::Instance().SessionScopeMemInstance(mem_type).Malloc(mem_size, runtime_param_.session_id); | |||||
} else if (res_static_memory == EN_OK) { | |||||
string memory_key = std::to_string(0) + it.second.memory_key; | |||||
mem_base = | |||||
MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, memory_key, mem_size, GetDeviceId()); | |||||
} else { | |||||
mem_base = MemManager::Instance().MemInstance(mem_type).MallocMemory(purpose, mem_size, GetDeviceId()); | |||||
} | |||||
if (mem_base == nullptr) { | |||||
REPORT_CALL_ERROR("E19999", "MallocExMem fail, type:%ld size:%zu, model_id:%u, check invalid", | |||||
mem_type, mem_size, model_id_); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc ex memory failed, type:%ld size: %zu", mem_type, mem_size); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | |||||
it.second.memory_base = mem_base; | |||||
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] mem_type[%ld] mem_addr[%p] mem_size[%zu]", | |||||
runtime_param_.graph_id, mem_type, mem_base, mem_size); | |||||
} | } | ||||
return p2p_mem_base; | |||||
return SUCCESS; | |||||
} | } | ||||
uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | ||||
uint8_t *weights_mem_base = nullptr; | uint8_t *weights_mem_base = nullptr; | ||||
const string purpose("weights memory in inference network."); | const string purpose("weights memory in inference network."); | ||||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | ||||
if (res == EN_OK) { | if (res == EN_OK) { | ||||
string weight_memory_key = std::to_string(0) + "_w"; | string weight_memory_key = std::to_string(0) + "_w"; | ||||
weights_mem_base = | |||||
MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); | |||||
weights_mem_base = MemManager::Instance() | |||||
.MemInstance(RT_MEMORY_HBM) | |||||
.MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); | |||||
} else { | } else { | ||||
weights_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weights_size, GetDeviceId()); | |||||
weights_mem_base = | |||||
MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, weights_size, GetDeviceId()); | |||||
} | } | ||||
return weights_mem_base; | return weights_mem_base; | ||||
} | } | ||||
void DavinciModel::FreeFeatureMapMem() { | void DavinciModel::FreeFeatureMapMem() { | ||||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | ||||
if (res == EN_OK && is_inner_mem_base_) { | if (res == EN_OK && is_inner_mem_base_) { | ||||
string weight_memory_key = std::to_string(0) + "_f"; | string weight_memory_key = std::to_string(0) + "_f"; | ||||
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(weight_memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weight_memory_key, GetDeviceId()), | |||||
"failed to free weight memory"); | |||||
} | } | ||||
mem_base_ = nullptr; | mem_base_ = nullptr; | ||||
} else { | } else { | ||||
GE_IF_BOOL_EXEC(mem_base_ != nullptr && is_inner_mem_base_, | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(mem_base_, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
mem_base_ = nullptr); | |||||
GE_IF_BOOL_EXEC( | |||||
mem_base_ != nullptr && is_inner_mem_base_, | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(mem_base_, GetDeviceId()), | |||||
"failed to free feature_map memory"); | |||||
mem_base_ = nullptr); | |||||
} | } | ||||
} | } | ||||
void DavinciModel::FreeP2PMem() { | |||||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||||
std::string p2p_memory_key = std::to_string(0) + "_p"; | |||||
if (MemManager::Instance(RT_MEMORY_P2P_DDR)->GetMemoryAddr(p2p_memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_memory_key, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
void DavinciModel::FreeExMem() { | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res_static_memory = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||||
for (auto it : runtime_param_.memory_infos) { | |||||
// free when session destory | |||||
if ((kSessionScopeMemory & it.first) == kSessionScopeMemory) { | |||||
continue; | |||||
} | |||||
auto mem_type = it.first & kMemoryTypeMask; | |||||
if (res_static_memory == EN_OK) { | |||||
std::string memory_key = std::to_string(0) + it.second.memory_key; | |||||
if (MemManager::Instance().MemInstance(mem_type).GetMemoryAddr(memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(memory_key, GetDeviceId()), | |||||
"failed to free memory"); | |||||
} | |||||
it.second.memory_base = nullptr; | |||||
} else { | |||||
GE_IF_BOOL_EXEC( | |||||
it.second.memory_base != nullptr, | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(mem_type).FreeMemory(it.second.memory_base, GetDeviceId()), | |||||
"failed to free memory"); | |||||
it.second.memory_base = nullptr); | |||||
} | } | ||||
p2p_mem_base_ = nullptr; | |||||
} else { | |||||
GE_IF_BOOL_EXEC(p2p_mem_base_ != nullptr && is_inner_mem_base_, | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_P2P_DDR)->FreeMemory(p2p_mem_base_, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
p2p_mem_base_ = nullptr); | |||||
} | } | ||||
} | } | ||||
void DavinciModel::FreeWeightsMem() { | void DavinciModel::FreeWeightsMem() { | ||||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
char ge_static_mem_env[MMPA_MAX_PATH] = {0x00}; | |||||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | ||||
if (res == EN_OK) { | if (res == EN_OK) { | ||||
string memory_key = std::to_string(0) + "_w"; | string memory_key = std::to_string(0) + "_w"; | ||||
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
if (MemManager::Instance().MemInstance(RT_MEMORY_HBM).GetMemoryAddr(memory_key) != nullptr) { | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key, GetDeviceId()), | |||||
"failed to free feature_map memory"); | |||||
} | } | ||||
weights_mem_base_ = nullptr; | weights_mem_base_ = nullptr; | ||||
} else { | } else { | ||||
GE_IF_BOOL_EXEC(weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, | |||||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weights_mem_base_, GetDeviceId()), | |||||
"[Free][Memory] failed, model_id:%u", model_id_); | |||||
weights_mem_base_ = nullptr); | |||||
GE_IF_BOOL_EXEC( | |||||
weights_mem_base_ != nullptr && weights_mem_base_ != mem_base_ && is_inner_weight_base_, | |||||
GE_CHK_STATUS(MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(weights_mem_base_, GetDeviceId()), | |||||
"failed to free weight memory"); | |||||
weights_mem_base_ = nullptr); | |||||
} | } | ||||
} | } | ||||
@@ -248,8 +248,6 @@ class DavinciModel { | |||||
// get total mem size | // get total mem size | ||||
size_t TotalMemSize() const { return runtime_param_.mem_size; } | size_t TotalMemSize() const { return runtime_param_.mem_size; } | ||||
const map<uint32_t, MemInfo> &P2PMemInfos() const { return runtime_param_.memory_infos; } | |||||
// model name | // model name | ||||
string Name() const { return name_; } | string Name() const { return name_; } | ||||
@@ -586,10 +584,8 @@ class DavinciModel { | |||||
// memory address of model | // memory address of model | ||||
uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever. | uintptr_t fixed_mem_base_; // Initial of mem_base_, keep forever. | ||||
uint8_t *mem_base_; | uint8_t *mem_base_; | ||||
uint8_t *p2p_mem_base_; | |||||
bool is_inner_mem_base_; | bool is_inner_mem_base_; | ||||
bool is_inner_weight_base_; | bool is_inner_weight_base_; | ||||
bool is_inner_p2p_mem_base_; | |||||
// input data manager | // input data manager | ||||
DataInputer *data_inputer_; | DataInputer *data_inputer_; | ||||
int64_t load_begin_time_; | int64_t load_begin_time_; | ||||
@@ -668,13 +664,13 @@ class DavinciModel { | |||||
uint8_t *MallocWeightsMem(size_t weights_size); | uint8_t *MallocWeightsMem(size_t weights_size); | ||||
uint8_t *MallocP2PMem(size_t p2p_data_size); | |||||
Status MallocExMem(); | |||||
void FreeFeatureMapMem(); | void FreeFeatureMapMem(); | ||||
void FreeWeightsMem(); | void FreeWeightsMem(); | ||||
void FreeP2PMem(); | |||||
void FreeExMem(); | |||||
void ReleaseTask(); | void ReleaseTask(); | ||||
@@ -21,6 +21,7 @@ | |||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/types.h" | #include "graph/types.h" | ||||
#include "graph/build/memory/block_mem_assigner.h" | |||||
#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | ||||
do { \ | do { \ | ||||
@@ -514,10 +515,16 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); | bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, v_memory_type); | ||||
bool has_mem_type_workspace = | bool has_mem_type_workspace = | ||||
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); | ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_TYPE_LIST, workspace_memory_type); | ||||
vector<int32_t> workspace_no_reuse_scope; | |||||
bool has_workspace_no_reuse_scope = | |||||
ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { | for (size_t i = 0; i < v_workspace_bytes.size(); ++i) { | ||||
// Temporary solution, the aicpu workspace of multiple images cannot be shared. | // Temporary solution, the aicpu workspace of multiple images cannot be shared. | ||||
if (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && | |||||
!model_param.is_single_op) { | |||||
bool aicpu_work_space = (has_workspace_reuse && i < workspace_reuse_flag.size() && !workspace_reuse_flag[i] && | |||||
!model_param.is_single_op); | |||||
if (aicpu_work_space) { | |||||
void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]); | void *mem_addr = model_param.aicpu_mem_mall->Acquire(v_workspace_offset[i], v_workspace_bytes[i]); | ||||
v_workspace_data_addr.push_back(mem_addr); | v_workspace_data_addr.push_back(mem_addr); | ||||
GELOGI( | GELOGI( | ||||
@@ -548,7 +555,13 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); | model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i]); | ||||
} else { | } else { | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); | VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_workspace_offset[i]); | ||||
uint8_t *mem_addr = model_param.mem_base + v_workspace_offset[i]; | |||||
uint8_t *mem_addr = nullptr; | |||||
bool session_scope_memory = (has_workspace_no_reuse_scope) && (i < workspace_no_reuse_scope.size()); | |||||
if (session_scope_memory) { | |||||
mem_addr = model_param.memory_infos.at(kSessionScopeMemory | RT_MEMORY_HBM).memory_base + v_workspace_offset[i]; | |||||
} else { | |||||
mem_addr = model_param.mem_base + v_workspace_offset[i]; | |||||
} | |||||
v_workspace_data_addr.push_back(mem_addr); | v_workspace_data_addr.push_back(mem_addr); | ||||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", | GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[F] name[%s] workspace[%zu] offset[%ld] bytes[%ld] memaddr[%p]", | ||||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], | model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i], v_workspace_bytes[i], | ||||
@@ -18,6 +18,7 @@ | |||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_TASK_INFO_H_ | ||||
#include <vector> | #include <vector> | ||||
#include <sstream> | |||||
#include "cce/customize.h" | #include "cce/customize.h" | ||||
#include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
@@ -28,9 +29,11 @@ | |||||
namespace ge { | namespace ge { | ||||
struct MemInfo { | struct MemInfo { | ||||
uint64_t memory_size = 0; | |||||
size_t memory_size = 0; | |||||
uint64_t logic_memory_base = 0; | uint64_t logic_memory_base = 0; | ||||
uint8_t *memory_base = nullptr; | uint8_t *memory_base = nullptr; | ||||
uint32_t memory_type = RT_MEMORY_HBM; | |||||
std::string memory_key = ""; | |||||
}; | }; | ||||
struct RuntimeParam { | struct RuntimeParam { | ||||
@@ -40,6 +43,19 @@ struct RuntimeParam { | |||||
} | } | ||||
~RuntimeParam() = default; | ~RuntimeParam() = default; | ||||
std::string ToString() { | |||||
std::stringstream ss; | |||||
ss << "session_id:" << session_id << ", stream_num:" << stream_num << ", event_num:" << event_num | |||||
<< ", label_num:" << label_num << ", logic_mem_base:" << logic_mem_base | |||||
<< ", logic_weight_base:" << logic_weight_base << ", logic_var_base:" << logic_var_base | |||||
<< ", memory_size:" << mem_size << ", weight_size:" << weight_size << ", var_size:" << var_size | |||||
<< ", ex_memory_info:"; | |||||
for (auto it : memory_infos) { | |||||
ss << "[memory_type:" << it.first << ", memory_size:" << it.second.memory_size << "]"; | |||||
} | |||||
return ss.str(); | |||||
} | |||||
uint64_t mem_size = 0; | uint64_t mem_size = 0; | ||||
uint64_t logic_mem_base = 0; | uint64_t logic_mem_base = 0; | ||||
uint8_t *mem_base = nullptr; | uint8_t *mem_base = nullptr; | ||||
@@ -49,7 +65,7 @@ struct RuntimeParam { | |||||
uint64_t var_size = 0; | uint64_t var_size = 0; | ||||
uint64_t logic_var_base = 0; | uint64_t logic_var_base = 0; | ||||
uint8_t *var_base = nullptr; | uint8_t *var_base = nullptr; | ||||
std::map<uint32_t, MemInfo> memory_infos; | |||||
std::map<uint64_t, MemInfo> memory_infos; | |||||
uint32_t batch_num = 0; | uint32_t batch_num = 0; | ||||
uint32_t stream_num = 0; | uint32_t stream_num = 0; | ||||
uint32_t event_num = 0; | uint32_t event_num = 0; | ||||
@@ -21,7 +21,7 @@ | |||||
#include <utility> | #include <utility> | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace ge { | namespace ge { | ||||
const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, | const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, | ||||
@@ -117,7 +117,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||||
} | } | ||||
free_block_bins_[i] = bin_ptr; | free_block_bins_[i] = bin_ptr; | ||||
} | } | ||||
memory_allocator_ = MemManager::Instance(memory_type_); | |||||
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); | |||||
if (memory_allocator_ == nullptr) { | if (memory_allocator_ == nullptr) { | ||||
return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
} | } | ||||
@@ -88,8 +88,8 @@ class CachingAllocator { | |||||
/// | /// | ||||
/// @ingroup ge_graph | /// @ingroup ge_graph | ||||
/// @brief free memory | /// @brief free memory | ||||
/// @param [in] memory_ptr memory address ptr | |||||
/// @param [in] device_id device id | /// @param [in] device_id device id | ||||
/// @param [out] memory_ptr memory address ptr | |||||
/// @return Status result of function | /// @return Status result of function | ||||
/// | /// | ||||
Status Free(uint8_t *memory_addr, uint32_t device_id = 0); | Status Free(uint8_t *memory_addr, uint32_t device_id = 0); | ||||
@@ -17,11 +17,9 @@ | |||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include <string> | #include <string> | ||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/rdma_pool_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
namespace ge { | namespace ge { | ||||
void MemoryAllocator::Initialize(uint32_t device_id) { | |||||
Status MemoryAllocator::Initialize(uint32_t device_id) { | |||||
GELOGI("MemoryAllocator::Initialize"); | GELOGI("MemoryAllocator::Initialize"); | ||||
// when redo Initialize free memory | // when redo Initialize free memory | ||||
@@ -31,6 +29,7 @@ void MemoryAllocator::Initialize(uint32_t device_id) { | |||||
} | } | ||||
} | } | ||||
memory_base_map_.clear(); | memory_base_map_.clear(); | ||||
return SUCCESS; | |||||
} | } | ||||
void MemoryAllocator::Finalize(uint32_t device_id) { | void MemoryAllocator::Finalize(uint32_t device_id) { | ||||
@@ -152,113 +151,4 @@ uint8_t *MemoryAllocator::GetMemoryAddr(const string &memory_key, uint32_t devic | |||||
return it->second.memory_addr_; | return it->second.memory_addr_; | ||||
} | } | ||||
MemManager::MemManager() {} | |||||
MemManager::~MemManager() { Finalize(); } | |||||
MemManager &MemManager::Instance() { | |||||
static MemManager mem_manager; | |||||
return mem_manager; | |||||
} | |||||
MemoryAllocator *MemManager::Instance(rtMemType_t memory_type) { return Instance().GetMemoryAllocator(memory_type); } | |||||
Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
MemoryAllocator *memory_allocator = nullptr; | |||||
for (unsigned int index : memory_type) { | |||||
auto it = memory_allocator_map_.find(index); | |||||
if (it == memory_allocator_map_.end()) { | |||||
memory_allocator = new (std::nothrow) MemoryAllocator(index); | |||||
if (memory_allocator != nullptr) { | |||||
memory_allocator_map_[index] = memory_allocator; | |||||
GELOGI("Create MemoryAllocator memory type[%u] success.", index); | |||||
} else { | |||||
REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); | |||||
} | |||||
} else { | |||||
memory_allocator = it->second; | |||||
} | |||||
if (memory_allocator == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} else { | |||||
memory_allocator->Initialize(0); | |||||
} | |||||
} | |||||
auto ret = InitAllocator(memory_type, caching_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create CachingAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, rdma_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create RdmaAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, host_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create HostMemAllocator failed."); | |||||
return ret; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
template <typename T> | |||||
void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) { | |||||
for (auto &allocator : allocate_map) { | |||||
if (allocator.second != nullptr) { | |||||
allocator.second->Finalize(); | |||||
delete allocator.second; | |||||
allocator.second = nullptr; | |||||
} | |||||
} | |||||
allocate_map.clear(); | |||||
} | |||||
void MemManager::Finalize() noexcept { | |||||
GELOGI("Finalize."); | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
// caching and rdma allocator use memory allocator, so finalize them first | |||||
FinalizeAllocatorMap(caching_allocator_map_); | |||||
FinalizeAllocatorMap(rdma_allocator_map_); | |||||
FinalizeAllocatorMap(host_allocator_map_); | |||||
FinalizeAllocatorMap(memory_allocator_map_); | |||||
} | |||||
MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
MemoryAllocator *memory_allocator = nullptr; | |||||
auto it = memory_allocator_map_.find(memory_type); | |||||
if (it != memory_allocator_map_.end()) { | |||||
memory_allocator = it->second; | |||||
} | |||||
// Usually impossible | |||||
if (memory_allocator == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); | |||||
static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); | |||||
return &default_memory_allocator; | |||||
} | |||||
return memory_allocator; | |||||
} | |||||
CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { | |||||
return Instance().GetAllocator(memory_type, caching_allocator_map_); | |||||
} | |||||
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | |||||
return Instance().GetAllocator(memory_type, rdma_allocator_map_); | |||||
} | |||||
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | |||||
return Instance().GetAllocator(memory_type, host_allocator_map_); | |||||
} | |||||
} // namespace ge | } // namespace ge |
@@ -26,7 +26,6 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/node.h" | #include "graph/node.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" | ||||
@@ -71,9 +70,9 @@ class MemoryAllocator { | |||||
/// @ingroup ge_graph | /// @ingroup ge_graph | ||||
/// @brief memory allocator init | /// @brief memory allocator init | ||||
/// @param [in] options user config params | /// @param [in] options user config params | ||||
/// @return void | |||||
/// @return Status of init | |||||
/// | /// | ||||
void Initialize(uint32_t device_id = 0); | |||||
Status Initialize(uint32_t device_id = 0); | |||||
/// | /// | ||||
/// @ingroup ge_graph | /// @ingroup ge_graph | ||||
@@ -136,109 +135,6 @@ class MemoryAllocator { | |||||
bool mem_malloced_; | bool mem_malloced_; | ||||
map<string, MemoryInfo> memory_base_map_; | map<string, MemoryInfo> memory_base_map_; | ||||
}; | }; | ||||
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | |||||
class CachingAllocator; | |||||
class RdmaPoolAllocator; | |||||
class MemManager { | |||||
public: | |||||
MemManager(); | |||||
virtual ~MemManager(); | |||||
static MemManager &Instance(); | |||||
static MemoryAllocator *Instance(rtMemType_t memory_type); | |||||
CachingAllocator &CachingInstance(rtMemType_t memory_type); | |||||
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | |||||
HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | |||||
MemManager(const MemManager &) = delete; | |||||
MemManager &operator=(const MemManager &) = delete; | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief memory allocator manager init | |||||
/// @param [in] options user config params | |||||
/// @return Status result of function | |||||
/// | |||||
Status Initialize(const std::vector<rtMemType_t> &memory_type); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief memory allocator finalize | |||||
/// @return void | |||||
/// | |||||
void Finalize() noexcept; | |||||
private: | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief ge memory allocator | |||||
/// @param [in] memory_type memory type | |||||
/// @return MemoryAllocator ptr | |||||
/// | |||||
MemoryAllocator *GetMemoryAllocator(rtMemType_t memory_type); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @param [in] memory_type memory type | |||||
/// @param [in] allocate_map memory allocator map | |||||
/// @return Status result of function | |||||
/// | |||||
template <typename T> | |||||
Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) { | |||||
T *allocator = nullptr; | |||||
for (unsigned int index : memory_type) { | |||||
auto it = allocate_map.find(index); | |||||
if (it == allocate_map.end()) { | |||||
allocator = new (std::nothrow) T(index); | |||||
if (allocator != nullptr) { | |||||
allocate_map[index] = allocator; | |||||
GELOGI("Create Allocator memory type[%u] success.", index); | |||||
} else { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); | |||||
} | |||||
} else { | |||||
allocator = it->second; | |||||
} | |||||
if (allocator == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} else { | |||||
if (allocator->Initialize() != SUCCESS) { | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @param [in] memory_type memory type | |||||
/// @param [in] allocate_map memory allocator map | |||||
/// @return Allocator ptr | |||||
/// | |||||
template <typename T> | |||||
T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) { | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
T *allocator = nullptr; | |||||
auto it = allocate_map.find(memory_type); | |||||
if (it != allocate_map.end()) { | |||||
allocator = it->second; | |||||
} | |||||
// Usually impossible | |||||
if (allocator == nullptr) { | |||||
GELOGW("Get allocator failed, memory type is %u.", memory_type); | |||||
static T default_allocator(RT_MEMORY_RESERVED); | |||||
return default_allocator; | |||||
} | |||||
return *allocator; | |||||
} | |||||
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | |||||
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | |||||
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | |||||
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | |||||
std::recursive_mutex allocator_mutex_; | |||||
}; | |||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ | #endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ |
@@ -0,0 +1,114 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include <string> | |||||
namespace ge { | |||||
MemManager::MemManager() {} | |||||
MemManager::~MemManager() { Finalize(); } | |||||
MemManager &MemManager::Instance() { | |||||
static MemManager mem_manager; | |||||
return mem_manager; | |||||
} | |||||
Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
if (init_) { | |||||
GELOGW("MemManager has been inited."); | |||||
return SUCCESS; | |||||
} | |||||
auto ret = InitAllocator(memory_type, memory_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create MemoryAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, caching_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create CachingAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, rdma_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create RdmaAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, host_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create HostMemAllocator failed."); | |||||
return ret; | |||||
} | |||||
ret = InitAllocator(memory_type, session_scope_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create HostMemAllocator failed."); | |||||
return ret; | |||||
} | |||||
init_ = true; | |||||
return SUCCESS; | |||||
} | |||||
template <typename T> | |||||
void FinalizeAllocatorMap(std::map<rtMemType_t, T *> &allocate_map) { | |||||
for (auto &allocator : allocate_map) { | |||||
if (allocator.second != nullptr) { | |||||
allocator.second->Finalize(); | |||||
delete allocator.second; | |||||
allocator.second = nullptr; | |||||
} | |||||
} | |||||
allocate_map.clear(); | |||||
} | |||||
void MemManager::Finalize() noexcept { | |||||
GELOGI("Finalize."); | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
// caching and rdma allocator use memory allocator, so finalize them first | |||||
FinalizeAllocatorMap(session_scope_allocator_map_); | |||||
FinalizeAllocatorMap(caching_allocator_map_); | |||||
FinalizeAllocatorMap(rdma_allocator_map_); | |||||
FinalizeAllocatorMap(host_allocator_map_); | |||||
FinalizeAllocatorMap(memory_allocator_map_); | |||||
init_ = false; | |||||
} | |||||
MemoryAllocator &MemManager::MemInstance(rtMemType_t memory_type) { | |||||
return GetAllocator(memory_type, memory_allocator_map_); | |||||
} | |||||
CachingAllocator &MemManager::CachingInstance(rtMemType_t memory_type) { | |||||
return GetAllocator(memory_type, caching_allocator_map_); | |||||
} | |||||
RdmaPoolAllocator &MemManager::RdmaPoolInstance(rtMemType_t memory_type) { | |||||
return GetAllocator(memory_type, rdma_allocator_map_); | |||||
} | |||||
HostMemAllocator &MemManager::HostMemInstance(rtMemType_t memory_type) { | |||||
return GetAllocator(memory_type, host_allocator_map_); | |||||
} | |||||
SessionScopeMemAllocator &MemManager::SessionScopeMemInstance(rtMemType_t memory_type) { | |||||
return GetAllocator(memory_type, session_scope_allocator_map_); | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,141 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ | |||||
#define GE_GRAPH_MANAGER_GRAPH_MEM_MANAGER_H_ | |||||
#include <iostream> | |||||
#include <map> | |||||
#include <memory> | |||||
#include <mutex> | |||||
#include <string> | |||||
#include <vector> | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "framework/common/ge_inner_error_codes.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/rdma_pool_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/session_scope_mem_allocator.h" | |||||
#include "graph/node.h" | |||||
#include "runtime/mem.h" | |||||
namespace ge { | |||||
using MemoryAllocatorPtr = std::shared_ptr<MemoryAllocator>; | |||||
class MemManager { | |||||
public: | |||||
MemManager(); | |||||
virtual ~MemManager(); | |||||
static MemManager &Instance(); | |||||
MemoryAllocator &MemInstance(rtMemType_t memory_type); | |||||
CachingAllocator &CachingInstance(rtMemType_t memory_type); | |||||
RdmaPoolAllocator &RdmaPoolInstance(rtMemType_t memory_type); | |||||
HostMemAllocator &HostMemInstance(rtMemType_t memory_type); | |||||
SessionScopeMemAllocator &SessionScopeMemInstance(rtMemType_t memory_type); | |||||
MemManager(const MemManager &) = delete; | |||||
MemManager &operator=(const MemManager &) = delete; | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief memory allocator manager init | |||||
/// @param [in] options user config params | |||||
/// @return Status result of function | |||||
/// | |||||
Status Initialize(const std::vector<rtMemType_t> &memory_type); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief memory allocator finalize | |||||
/// @return void | |||||
/// | |||||
void Finalize() noexcept; | |||||
const std::vector<rtMemType_t> &GetAllMemoryType() const { return memory_type_; } | |||||
private: | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @param [in] memory_type memory type | |||||
/// @param [in] allocate_map memory allocator map | |||||
/// @return Status result of function | |||||
/// | |||||
template <typename T> | |||||
Status InitAllocator(const std::vector<rtMemType_t> &memory_type, std::map<rtMemType_t, T *> &allocate_map) { | |||||
T *allocator = nullptr; | |||||
for (unsigned int index : memory_type) { | |||||
auto it = allocate_map.find(index); | |||||
if (it == allocate_map.end()) { | |||||
allocator = new (std::nothrow) T(index); | |||||
if (allocator != nullptr) { | |||||
allocate_map[index] = allocator; | |||||
GELOGI("Create Allocator memory type[%u] success.", index); | |||||
} else { | |||||
REPORT_CALL_ERROR("E19999", "New MemoryAllocator fail, index:%u", index); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); | |||||
} | |||||
} else { | |||||
allocator = it->second; | |||||
} | |||||
if (allocator == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} else { | |||||
if (allocator->Initialize() != SUCCESS) { | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @param [in] memory_type memory type | |||||
/// @param [in] allocate_map memory allocator map | |||||
/// @return Allocator ptr | |||||
/// | |||||
template <typename T> | |||||
T &GetAllocator(rtMemType_t memory_type, std::map<rtMemType_t, T *> allocate_map) { | |||||
std::lock_guard<std::recursive_mutex> lock(allocator_mutex_); | |||||
T *allocator = nullptr; | |||||
auto it = allocate_map.find(memory_type); | |||||
if (it != allocate_map.end()) { | |||||
allocator = it->second; | |||||
} | |||||
// Usually impossible | |||||
if (allocator == nullptr) { | |||||
GELOGW("Get allocator failed, memory type is %u.", memory_type); | |||||
static T default_allocator(RT_MEMORY_RESERVED); | |||||
return default_allocator; | |||||
} | |||||
return *allocator; | |||||
} | |||||
std::map<rtMemType_t, MemoryAllocator *> memory_allocator_map_; | |||||
std::map<rtMemType_t, CachingAllocator *> caching_allocator_map_; | |||||
std::map<rtMemType_t, RdmaPoolAllocator *> rdma_allocator_map_; | |||||
std::map<rtMemType_t, HostMemAllocator *> host_allocator_map_; | |||||
std::map<rtMemType_t, SessionScopeMemAllocator *> session_scope_allocator_map_; | |||||
std::recursive_mutex allocator_mutex_; | |||||
std::vector<rtMemType_t> memory_type_; | |||||
bool init_ = false; | |||||
}; | |||||
} // namespace ge | |||||
#endif // GE_GRAPH_MANAGER_GRAPH_MEM_ALLOCATOR_H_ |
@@ -17,8 +17,7 @@ | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/rdma_pool_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
@@ -728,7 +727,7 @@ ge::Status VarManager::MallocVarMemory(size_t memory_size) { | |||||
var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; | var_memory_size = (var_memory_size + kSessionMemAlignSize - 1) / kSessionMemAlignSize * kSessionMemAlignSize; | ||||
const string purpose("variables and constant op memory in training network."); | const string purpose("variables and constant op memory in training network."); | ||||
var_mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, var_memory_size); | |||||
var_mem_base = MemManager::Instance().MemInstance(RT_MEMORY_HBM).MallocMemory(purpose, memory_key, var_memory_size); | |||||
if (var_mem_base == nullptr) { | if (var_mem_base == nullptr) { | ||||
GELOGE(ge::INTERNAL_ERROR, | GELOGE(ge::INTERNAL_ERROR, | ||||
"VarManager::MallocVarMemory failed " | "VarManager::MallocVarMemory failed " | ||||
@@ -745,7 +744,7 @@ uint8_t *VarManager::GetVarMemoryBase(rtMemType_t memory_type) { | |||||
return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); | return MemManager::Instance().RdmaPoolInstance(RT_MEMORY_HBM).GetRdmaBaseAddr(); | ||||
} | } | ||||
string memory_key = std::to_string(session_id_); | string memory_key = std::to_string(session_id_); | ||||
return MemManager::Instance(memory_type)->GetMemoryAddr(memory_key); | |||||
return MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(memory_key); | |||||
} | } | ||||
uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { | uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_type) { | ||||
@@ -754,7 +753,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty | |||||
return logic_addr; | return logic_addr; | ||||
} | } | ||||
string mem_key = std::to_string(session_id_); | string mem_key = std::to_string(session_id_); | ||||
uint8_t *mem_base = MemManager::Instance(memory_type)->GetMemoryAddr(mem_key); | |||||
uint8_t *mem_base = MemManager::Instance().MemInstance(memory_type).GetMemoryAddr(mem_key); | |||||
if (mem_base == nullptr) { | if (mem_base == nullptr) { | ||||
return nullptr; | return nullptr; | ||||
} | } | ||||
@@ -766,7 +765,7 @@ uint8_t *VarManager::GetVarMemoryAddr(uint8_t *logic_addr, rtMemType_t memory_ty | |||||
ge::Status VarManager::FreeVarMemory() { | ge::Status VarManager::FreeVarMemory() { | ||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | std::lock_guard<std::recursive_mutex> lock(mutex_); | ||||
string memory_key = std::to_string(SessionId()); | string memory_key = std::to_string(SessionId()); | ||||
return MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key); | |||||
return MemManager::Instance().MemInstance(RT_MEMORY_HBM).FreeMemory(memory_key); | |||||
} | } | ||||
ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | ge::Status VarManager::SetTransRoad(const std::string &var_name, const VarTransRoad &trans_road) { | ||||
@@ -19,7 +19,7 @@ | |||||
#include <memory> | #include <memory> | ||||
#include "common/ge/plugin_manager.h" | #include "common/ge/plugin_manager.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
#include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
@@ -20,6 +20,7 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "runtime/dev.h" | #include "runtime/dev.h" | ||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace { | namespace { | ||||
const size_t kAlignedSize = 512; | const size_t kAlignedSize = 512; | ||||
@@ -49,7 +50,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) | |||||
})) {} | })) {} | ||||
Status RdmaPoolAllocator::Initialize() { | Status RdmaPoolAllocator::Initialize() { | ||||
memory_allocator_ = MemManager::Instance(memory_type_); | |||||
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); | |||||
if (memory_allocator_ == nullptr) { | if (memory_allocator_ == nullptr) { | ||||
return ACL_ERROR_GE_INTERNAL_ERROR; | return ACL_ERROR_GE_INTERNAL_ERROR; | ||||
} | } | ||||
@@ -0,0 +1,85 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph/manager/session_scope_mem_allocator.h" | |||||
#include <set> | |||||
#include <string> | |||||
#include <utility> | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace ge { | |||||
SessionScopeMemAllocator::SessionScopeMemAllocator(rtMemType_t memory_type) | |||||
: memory_type_(memory_type), memory_allocator_(nullptr) {} | |||||
Status SessionScopeMemAllocator::Initialize(uint32_t device_id) { | |||||
GELOGI("Device id %u", device_id); | |||||
// when redo Initialize free old memory | |||||
FreeAllMemory(); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_); | |||||
if (memory_allocator_ == nullptr) { | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
return ge::SUCCESS; | |||||
} | |||||
void SessionScopeMemAllocator::Finalize(uint32_t device_id) { | |||||
GELOGI("Device id %u", device_id); | |||||
FreeAllMemory(); | |||||
} | |||||
uint8_t *SessionScopeMemAllocator::Malloc(size_t size, uint64_t session_id, uint32_t device_id) { | |||||
GELOGI("Start malloc memory, size:%zu, session id:%lu device id:%u", size, session_id, device_id); | |||||
const std::string purpose = "Memory for session scope."; | |||||
auto ptr = memory_allocator_->MallocMemory(purpose, size, device_id); | |||||
if (ptr == nullptr) { | |||||
GELOGE(ge::FAILED, "Malloc failed, no enough memory for size:%zu, session_id:%lu device_id:%u", size, | |||||
session_id, device_id); | |||||
return nullptr; | |||||
} | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
std::shared_ptr<uint8_t> mem_ptr(ptr, [&](uint8_t *p) { (void)memory_allocator_->FreeMemory(p); }); | |||||
allocated_memory_[session_id].emplace_back(size, mem_ptr); | |||||
return ptr; | |||||
} | |||||
Status SessionScopeMemAllocator::Free(uint64_t session_id, uint32_t device_id) { | |||||
GELOGI("Free session:%lu memory, device id:%u.", session_id, device_id); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
auto it = allocated_memory_.find(session_id); | |||||
if (it == allocated_memory_.end()) { | |||||
REPORT_INNER_ERROR("E19999", "Param memory not allocated before, session_id:%lu device_id:%u, check invalid", | |||||
session_id, device_id); | |||||
GELOGE(PARAM_INVALID, "Invalid session_id"); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
allocated_memory_.erase(it); | |||||
return ge::SUCCESS; | |||||
} | |||||
void SessionScopeMemAllocator::FreeAllMemory() { | |||||
GELOGI("Free all memory"); | |||||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||||
for (auto &session_mem : allocated_memory_) { | |||||
session_mem.second.clear(); | |||||
} | |||||
allocated_memory_.clear(); | |||||
} | |||||
} // namespace ge |
@@ -0,0 +1,123 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ | |||||
#define GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ | |||||
#include <iostream> | |||||
#include <map> | |||||
#include <memory> | |||||
#include <mutex> | |||||
#include <string> | |||||
#include <vector> | |||||
#include <unordered_map> | |||||
#include <functional> | |||||
#include "framework/common/ge_inner_error_codes.h" | |||||
#include "graph/node.h" | |||||
#include "graph/manager/block_memory.h" | |||||
#include "runtime/mem.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
namespace ge { | |||||
class SessionScopeMemoryInfo { | |||||
public: | |||||
SessionScopeMemoryInfo(size_t size, const std::shared_ptr<uint8_t> &ptr) : size(size), ptr(ptr) {} | |||||
SessionScopeMemoryInfo() = delete; | |||||
virtual ~SessionScopeMemoryInfo() = default; | |||||
SessionScopeMemoryInfo(const SessionScopeMemoryInfo &other) { | |||||
if (&other == this) { | |||||
return; | |||||
} | |||||
size = other.size; | |||||
ptr = other.ptr; | |||||
}; | |||||
SessionScopeMemoryInfo &operator=(const SessionScopeMemoryInfo &other) { | |||||
if (&other == this) { | |||||
return *this; | |||||
} | |||||
size = other.size; | |||||
ptr = other.ptr; | |||||
}; | |||||
private: | |||||
size_t size = 0; | |||||
std::shared_ptr<uint8_t> ptr = nullptr; | |||||
}; | |||||
class SessionScopeMemAllocator { | |||||
public: | |||||
explicit SessionScopeMemAllocator(rtMemType_t memory_type); | |||||
SessionScopeMemAllocator(const SessionScopeMemAllocator &) = delete; | |||||
SessionScopeMemAllocator &operator=(const SessionScopeMemAllocator &) = delete; | |||||
virtual ~SessionScopeMemAllocator() = default; | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief caching allocator init | |||||
/// @param [in] device id | |||||
/// @return Status of init | |||||
/// | |||||
Status Initialize(uint32_t device_id = 0); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief memory allocator finalize, release all memory | |||||
/// @return void | |||||
/// | |||||
void Finalize(uint32_t device_id = 0); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief malloc memory | |||||
/// @param [in] size memory size | |||||
/// @param [in] session_id session id | |||||
/// @param [in] device id | |||||
/// @return memory address | |||||
/// | |||||
uint8_t *Malloc(size_t size, uint64_t session_id, uint32_t device_id = 0); | |||||
/// | |||||
/// @ingroup ge_graph | |||||
/// @brief free memory | |||||
/// @param [in] session_id session id | |||||
/// @param [in] device_id device id | |||||
/// @return Status result of function | |||||
/// | |||||
Status Free(uint64_t session_id, uint32_t device_id = 0); | |||||
private: | |||||
void FreeAllMemory(); | |||||
private: | |||||
rtMemType_t memory_type_; | |||||
// device memory allocator | |||||
MemoryAllocator *memory_allocator_; | |||||
// lock around all operations | |||||
mutable std::recursive_mutex mutex_; | |||||
// allocated blocks by memory pointer | |||||
std::unordered_map<uint64_t, std::vector<SessionScopeMemoryInfo>> allocated_memory_; | |||||
}; | |||||
} // namespace ge | |||||
#endif // GE_GRAPH_MANAGER_SESSION_SCOPE_MEM_ALLOCATOR_H_ |
@@ -17,10 +17,7 @@ | |||||
#include "npu_memory_allocator.h" | #include "npu_memory_allocator.h" | ||||
#include <mutex> | #include <mutex> | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/rdma_pool_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -26,8 +26,7 @@ | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
#include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "hybrid/common/npu_memory_allocator.h" | #include "hybrid/common/npu_memory_allocator.h" | ||||
#include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
@@ -18,8 +18,7 @@ | |||||
#include "hybrid/node_executor/host_cpu/kernel_factory.h" | #include "hybrid/node_executor/host_cpu/kernel_factory.h" | ||||
#include "graph/passes/folding_pass.h" | #include "graph/passes/folding_pass.h" | ||||
#include "hybrid/model/hybrid_model.h" | #include "hybrid/model/hybrid_model.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/host_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "ge_local_engine/engine/host_cpu_engine.h" | #include "ge_local_engine/engine/host_cpu_engine.h" | ||||
namespace ge { | namespace ge { | ||||
@@ -39,7 +39,7 @@ | |||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/ge_global_options.h" | #include "graph/ge_global_options.h" | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/manager/host_mem_manager.h" | #include "graph/manager/host_mem_manager.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
@@ -32,6 +32,7 @@ | |||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/load/model_manager/model_manager.h" | #include "graph/load/model_manager/model_manager.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" | ||||
@@ -155,6 +156,11 @@ Status InnerSession::Finalize() { | |||||
// release var memory | // release var memory | ||||
GELOGI("VarManager free var memory."); | GELOGI("VarManager free var memory."); | ||||
(void)VarManager::Instance(session_id_)->FreeVarMemory(); | (void)VarManager::Instance(session_id_)->FreeVarMemory(); | ||||
for (auto memory_type : MemManager::Instance().GetAllMemoryType()) { | |||||
(void)MemManager::Instance().SessionScopeMemInstance(memory_type).Free(session_id_); | |||||
} | |||||
// release analyzer saved info(Session Level) | // release analyzer saved info(Session Level) | ||||
Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); | Analyzer::GetInstance()->DestroySessionJsonObject(session_id_); | ||||
@@ -19,8 +19,7 @@ | |||||
#include <mutex> | #include <mutex> | ||||
#include <string> | #include <string> | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
namespace ge { | namespace ge { | ||||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { | FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOpManager::~SingleOpManager() { | ||||
@@ -33,7 +33,7 @@ class GE_FUNC_VISIBILITY MemoryAssigner { | |||||
MemoryAssigner &operator=(const MemoryAssigner &) = delete; | MemoryAssigner &operator=(const MemoryAssigner &) = delete; | ||||
Status AssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||||
Status AssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_offset, size_t &zero_copy_mem_size); | |||||
private: | private: | ||||
ge::ComputeGraphPtr compute_graph_; | ge::ComputeGraphPtr compute_graph_; | ||||
@@ -337,8 +337,10 @@ set(COMMON_SRC_FILES | |||||
"${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | ||||
"${GE_CODE_DIR}/ge/graph/common/local_context.cc" | "${GE_CODE_DIR}/ge/graph/common/local_context.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" | |||||
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" | |||||
"${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" | ||||
"${GE_CODE_DIR}/ge/common/model_saver.cc" | "${GE_CODE_DIR}/ge/common/model_saver.cc" | ||||
"${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "${GE_CODE_DIR}/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
@@ -396,8 +398,10 @@ set(GRAPH_LOAD_COMMON_SRC_FILES | |||||
"${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_var_manager.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | "${GE_CODE_DIR}/ge/graph/manager/trans_var_data_utils.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/graph_caching_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/session_scope_mem_allocator.cc" | |||||
"${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/rdma_pool_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | "${GE_CODE_DIR}/ge/graph/manager/host_mem_allocator.cc" | ||||
"${GE_CODE_DIR}/ge/graph/manager/graph_mem_manager.cc" | |||||
"${GE_CODE_DIR}/ge/common/thread_pool.cc" | "${GE_CODE_DIR}/ge/common/thread_pool.cc" | ||||
) | ) | ||||
@@ -792,6 +796,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
"graph/preprocess/graph_preprocess_unittest.cc" | "graph/preprocess/graph_preprocess_unittest.cc" | ||||
"graph/manager/hcom_util_unittest.cc" | "graph/manager/hcom_util_unittest.cc" | ||||
"graph/manager/graph_caching_allocator_unittest.cc" | "graph/manager/graph_caching_allocator_unittest.cc" | ||||
"graph/manager/session_scope_mem_allocator_unittest.cc" | |||||
"graph/manager/run_graph_unittest.cc" | "graph/manager/run_graph_unittest.cc" | ||||
"graph/partition/dynamic_shape_partition_unittest.cc" | "graph/partition/dynamic_shape_partition_unittest.cc" | ||||
"graph/manager/graph_manager_unittest.cc" | "graph/manager/graph_manager_unittest.cc" | ||||
@@ -824,6 +829,7 @@ set(PROFILING_MNG_TEST_FILES | |||||
set(HYBRID_TEST_FILES | set(HYBRID_TEST_FILES | ||||
"hybrid/ge_hybrid_unittest.cc" | "hybrid/ge_hybrid_unittest.cc" | ||||
"hybrid/known_node_executor_unittest.cc" | "hybrid/known_node_executor_unittest.cc" | ||||
"hybrid/executor/worker/execution_engine_unittest.cc" | |||||
"hybrid/executor/subgraph_executor_unittest.cc" | "hybrid/executor/subgraph_executor_unittest.cc" | ||||
"hybrid/executor/worker/execution_engine_unittest.cc" | "hybrid/executor/worker/execution_engine_unittest.cc" | ||||
"hybrid/model/hybrid_model_builder_unittest.cc" | "hybrid/model/hybrid_model_builder_unittest.cc" | ||||
@@ -44,7 +44,8 @@ using domi::GetContext; | |||||
class UtestMemoryAssignerTest : public testing::Test { | class UtestMemoryAssignerTest : public testing::Test { | ||||
public: | public: | ||||
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", int64_t size = 1024) { | |||||
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some", | |||||
int64_t size = 1024) { | |||||
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type); | ||||
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | auto desc_temp_ptr = make_shared<ge::GeTensorDesc>(); | ||||
auto desc_temp = *desc_temp_ptr; | auto desc_temp = *desc_temp_ptr; | ||||
@@ -214,7 +215,8 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
return builder.GetGraph(); | return builder.GetGraph(); | ||||
} | } | ||||
void make_ffts_reuse_graph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, | |||||
void MakeFftsReuseGraph(ge::ComputeGraphPtr graph, int32_t thread_scope_id_1 = kInvalidThreadScopeId, | |||||
int32_t thread_scope_id_2 = kInvalidThreadScopeId) { | int32_t thread_scope_id_2 = kInvalidThreadScopeId) { | ||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | ||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | ||||
@@ -253,28 +255,119 @@ class UtestMemoryAssignerTest : public testing::Test { | |||||
graph->TopologicalSorting(); | graph->TopologicalSorting(); | ||||
} | } | ||||
void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | |||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | |||||
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); | |||||
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); | |||||
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); | |||||
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); | |||||
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); | |||||
std::vector<int64_t> workspace_bytes; | |||||
workspace_bytes.push_back(1024); | |||||
workspace_bytes.push_back(512); | |||||
op_def_c->SetWorkspaceBytes(workspace_bytes); | |||||
vector<int32_t> workspace_no_reuse_scope = { 0 , 1 }; | |||||
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
vector<int32_t> workspace_no_reuse_scope_e = { 1 }; | |||||
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
ge::NodePtr node_b = graph->AddNode(op_def_b); | |||||
ge::NodePtr node_c = graph->AddNode(op_def_c); | |||||
ge::NodePtr node_d = graph->AddNode(op_def_d); | |||||
ge::NodePtr node_e = graph->AddNode(op_def_e); | |||||
ge::NodePtr node_f = graph->AddNode(op_def_f); | |||||
ge::NodePtr node_g = graph->AddNode(op_def_g); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); | |||||
graph->TopologicalSorting(); | |||||
} | |||||
void MakeContinuousReuseGraph(ge::ComputeGraphPtr graph, bool nopading = false) { | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | |||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | |||||
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); | |||||
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); | |||||
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); | |||||
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); | |||||
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); | |||||
if (nopading) { | |||||
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); | |||||
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, true); | |||||
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_OUTPUT_REUSE_INPUT, true); | |||||
(void)ge::AttrUtils::SetInt(op_def_d, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, 0); | |||||
} else { | |||||
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_INPUT, true); | |||||
(void)ge::AttrUtils::SetBool(op_def_d, ATTR_NAME_CONTINUOUS_OUTPUT, true); | |||||
} | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
ge::NodePtr node_b = graph->AddNode(op_def_b); | |||||
ge::NodePtr node_c = graph->AddNode(op_def_c); | |||||
ge::NodePtr node_d = graph->AddNode(op_def_d); | |||||
ge::NodePtr node_e = graph->AddNode(op_def_e); | |||||
ge::NodePtr node_f = graph->AddNode(op_def_f); | |||||
ge::NodePtr node_g = graph->AddNode(op_def_g); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); | |||||
graph->TopologicalSorting(); | |||||
} | |||||
void MakeMultiBatchReuseGraph(ge::ComputeGraphPtr graph) { | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | |||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | |||||
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); | |||||
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); | |||||
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); | |||||
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512, "some", 2048UL); | |||||
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); | |||||
(void)ge::AttrUtils::SetStr(op_def_b, ATTR_NAME_BATCH_LABEL, "Batch_0"); | |||||
(void)ge::AttrUtils::SetStr(op_def_c, ATTR_NAME_BATCH_LABEL, "Batch_0"); | |||||
(void)ge::AttrUtils::SetStr(op_def_e, ATTR_NAME_BATCH_LABEL, "Batch_1"); | |||||
(void)ge::AttrUtils::SetStr(op_def_f, ATTR_NAME_BATCH_LABEL, "Batch_1"); | |||||
vector<int32_t> workspace_no_reuse_scope = { 1 }; | |||||
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
ge::NodePtr node_b = graph->AddNode(op_def_b); | |||||
ge::NodePtr node_c = graph->AddNode(op_def_c); | |||||
ge::NodePtr node_d = graph->AddNode(op_def_d); | |||||
ge::NodePtr node_e = graph->AddNode(op_def_e); | |||||
ge::NodePtr node_f = graph->AddNode(op_def_f); | |||||
ge::NodePtr node_g = graph->AddNode(op_def_g); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); | |||||
graph->TopologicalSorting(); | |||||
} | |||||
protected: | protected: | ||||
void SetUp() {} | void SetUp() {} | ||||
void TearDown() { GetContext().out_nodes_map.clear(); } | void TearDown() { GetContext().out_nodes_map.clear(); } | ||||
}; | }; | ||||
/* | |||||
TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
MemoryBlock* memory_block = new MemoryBlock(0); | |||||
memory_block->Init(1, kOutput, node_a, 0, 1); | |||||
memory_block->real_size_list_.clear(); | |||||
memory_block->Resize(); | |||||
EXPECT_EQ(memory_block->Size(), 0); | |||||
delete memory_block; | |||||
} | |||||
*/ | |||||
namespace ge { | namespace ge { | ||||
class MockBlockMemAssigner : public BlockMemAssigner { | class MockBlockMemAssigner : public BlockMemAssigner { | ||||
@@ -313,12 +406,44 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { | |||||
EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); | EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); | ||||
} | } | ||||
TEST_F(UtestMemoryAssignerTest, block_memory_assign_nopading_continuous_memory) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
MakeContinuousReuseGraph(graph, true); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 8192); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | |||||
TEST_F(UtestMemoryAssignerTest, block_memory_assign_continuous_memory) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
MakeContinuousReuseGraph(graph); | |||||
map<uint64_t, size_t> mem_offset; | |||||
size_t zero_copy_mem_size = 0; | |||||
MemoryAssigner memoryAssigner(graph); | |||||
ge::Status ret = memoryAssigner.AssignMemory(false, mem_offset, zero_copy_mem_size); | |||||
size_t offset = 0; | |||||
auto it = mem_offset.find(RT_MEMORY_HBM); | |||||
if (it != mem_offset.end()) { | |||||
offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 11264); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | |||||
TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { | TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) { | ||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
MakeGraph(graph); | MakeGraph(graph); | ||||
auto node_f = graph->FindNode("F"); | auto node_f = graph->FindNode("F"); | ||||
MemoryAssigner memory_assigner(graph); | MemoryAssigner memory_assigner(graph); | ||||
map<int64_t, size_t> mem_offset; | |||||
map<uint64_t, size_t> mem_offset; | |||||
size_t zero_memory_size = 0; | size_t zero_memory_size = 0; | ||||
EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | ||||
@@ -335,7 +460,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var) { | |||||
std::string value = "A"; | std::string value = "A"; | ||||
(void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); | (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); | ||||
MemoryAssigner memory_assigner(graph); | MemoryAssigner memory_assigner(graph); | ||||
map<int64_t, size_t> mem_offset; | |||||
map<uint64_t, size_t> mem_offset; | |||||
size_t zero_memory_size = 0; | size_t zero_memory_size = 0; | ||||
VarManager::Instance(0)->Init(0, 0, 0, 0); | VarManager::Instance(0)->Init(0, 0, 0, 0); | ||||
EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | ||||
@@ -356,7 +481,7 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_ref_var_not_found) { | |||||
std::string value = "M"; | std::string value = "M"; | ||||
(void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); | (void) ge::AttrUtils::SetStr(node_b->GetOpDesc()->MutableOutputDesc(0), REF_VAR_SRC_VAR_NAME, value); | ||||
MemoryAssigner memory_assigner(graph); | MemoryAssigner memory_assigner(graph); | ||||
map<int64_t, size_t> mem_offset; | |||||
map<uint64_t, size_t> mem_offset; | |||||
size_t zero_memory_size = 0; | size_t zero_memory_size = 0; | ||||
VarManager::Instance(0)->Init(0, 0, 0, 0); | VarManager::Instance(0)->Init(0, 0, 0, 0); | ||||
EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | EXPECT_NE(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS); | ||||
@@ -460,30 +585,86 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_atomic_output_and_workspace) | |||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) { | TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_no_functinon_op) { | ||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
make_ffts_reuse_graph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); | |||||
MakeFftsReuseGraph(graph, kInvalidThreadScopeId, kInvalidThreadScopeId); | |||||
HybridMemAssigner hybridMemAssigner(graph); | HybridMemAssigner hybridMemAssigner(graph); | ||||
ge::Status ret = hybridMemAssigner.Assign(); | ge::Status ret = hybridMemAssigner.Assign(); | ||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 5120); | EXPECT_EQ(offset, 5120); | ||||
EXPECT_EQ(ret, SUCCESS); | EXPECT_EQ(ret, SUCCESS); | ||||
} | } | ||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) { | TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_two_functinon_op) { | ||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
make_ffts_reuse_graph(graph, 0, 1); | |||||
MakeFftsReuseGraph(graph, 0, 1); | |||||
HybridMemAssigner hybridMemAssigner(graph); | HybridMemAssigner hybridMemAssigner(graph); | ||||
ge::Status ret = hybridMemAssigner.Assign(); | ge::Status ret = hybridMemAssigner.Assign(); | ||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 6656); | EXPECT_EQ(offset, 6656); | ||||
EXPECT_EQ(ret, SUCCESS); | EXPECT_EQ(ret, SUCCESS); | ||||
} | } | ||||
TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) { | TEST_F(UtestMemoryAssignerTest, Mock_ffts_reuse_one_functinon_op) { | ||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | ||||
make_ffts_reuse_graph(graph, 0, kInvalidThreadScopeId); | |||||
MakeFftsReuseGraph(graph, 0, kInvalidThreadScopeId); | |||||
HybridMemAssigner hybridMemAssigner(graph); | HybridMemAssigner hybridMemAssigner(graph); | ||||
ge::Status ret = hybridMemAssigner.Assign(); | ge::Status ret = hybridMemAssigner.Assign(); | ||||
size_t offset = hybridMemAssigner.GetMemOffset(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 5632); | EXPECT_EQ(offset, 5632); | ||||
EXPECT_EQ(ret, SUCCESS); | EXPECT_EQ(ret, SUCCESS); | ||||
} | |||||
TEST_F(UtestMemoryAssignerTest, one_session_scope_op) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
MakeSessionScopeReuseGraph(graph); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); | |||||
size_t session_scope_offset = 0; | |||||
it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
session_scope_offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 5120); | |||||
EXPECT_EQ(session_scope_offset, 1536); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | |||||
TEST_F(UtestMemoryAssignerTest, multi_batch_reuse) { | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
MakeMultiBatchReuseGraph(graph); | |||||
HybridMemAssigner hybridMemAssigner(graph); | |||||
ge::Status ret = hybridMemAssigner.Assign(); | |||||
size_t offset = 0; | |||||
auto it = hybridMemAssigner.GetMemOffsets().find(RT_MEMORY_HBM); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
offset = it->second; | |||||
} | |||||
auto mem_type_session_scope = (kSessionScopeMemory | RT_MEMORY_HBM); | |||||
size_t session_scope_offset = 0; | |||||
it = hybridMemAssigner.GetMemOffsets().find(mem_type_session_scope); | |||||
if (it != hybridMemAssigner.GetMemOffsets().end()) { | |||||
session_scope_offset = it->second; | |||||
} | |||||
EXPECT_EQ(offset, 6656); | |||||
EXPECT_EQ(session_scope_offset, 1536); | |||||
EXPECT_EQ(ret, SUCCESS); | |||||
} | } |
@@ -30,6 +30,7 @@ | |||||
#define protected public | #define protected public | ||||
#define private public | #define private public | ||||
#include "graph/build/model_builder.h" | #include "graph/build/model_builder.h" | ||||
#include "memory/memory_assigner.h" | |||||
#undef protected | #undef protected | ||||
#undef private | #undef private | ||||
@@ -127,6 +128,41 @@ class UtestModelBuilderTest : public testing::Test { | |||||
graph->TopologicalSorting(); | graph->TopologicalSorting(); | ||||
} | } | ||||
void MakeSessionScopeReuseGraph(ge::ComputeGraphPtr graph) { | |||||
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 512); | |||||
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 0); | |||||
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 512); | |||||
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 512); | |||||
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 1024); | |||||
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 512); | |||||
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 0); | |||||
std::vector<int64_t> workspace_bytes; | |||||
workspace_bytes.push_back(1024); | |||||
workspace_bytes.push_back(512); | |||||
op_def_c->SetWorkspaceBytes(workspace_bytes); | |||||
vector<int32_t> workspace_no_reuse_scope = { 0 , 1 }; | |||||
(void)ge::AttrUtils::SetListInt(op_def_c, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope); | |||||
vector<int32_t> workspace_no_reuse_scope_e = { 1 }; | |||||
(void)ge::AttrUtils::SetListInt(op_def_e, ATTR_NAME_WORKSPACE_MEMORY_NO_REUSE_SCOPE, workspace_no_reuse_scope_e); | |||||
ge::NodePtr node_a = graph->AddNode(op_def_a); | |||||
ge::NodePtr node_b = graph->AddNode(op_def_b); | |||||
ge::NodePtr node_c = graph->AddNode(op_def_c); | |||||
ge::NodePtr node_d = graph->AddNode(op_def_d); | |||||
ge::NodePtr node_e = graph->AddNode(op_def_e); | |||||
ge::NodePtr node_f = graph->AddNode(op_def_f); | |||||
ge::NodePtr node_g = graph->AddNode(op_def_g); | |||||
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_c->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_d->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_e->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_f->GetInDataAnchor(0)); | |||||
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_g->GetInDataAnchor(0)); | |||||
graph->TopologicalSorting(); | |||||
} | |||||
protected: | protected: | ||||
void SetUp() {} | void SetUp() {} | ||||
@@ -162,6 +198,24 @@ TEST_F(UtestModelBuilderTest, test_save_atomic_bin) { | |||||
EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS); | EXPECT_EQ(builder.SaveAtomicTBEKernel(op_desc), SUCCESS); | ||||
} | } | ||||
TEST_F(UtestModelBuilderTest, build_model_for_get_task) { | |||||
Graph2SubGraphInfoList subgraphs; | |||||
std::map<std::string, int> stream_max_parallel_num; | |||||
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>(""); | |||||
MakeSessionScopeReuseGraph(graph); | |||||
std::map<std::string, std::string> option; | |||||
ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false); | |||||
MemoryAssigner mem_assigner(graph); | |||||
EXPECT_EQ(mem_assigner.AssignMemory(false, builder.mem_type_to_mem_offset_, builder.zero_copy_mem_size_), SUCCESS); | |||||
ge::Model model; | |||||
EXPECT_EQ(builder.BuildModelDef(model), SUCCESS); | |||||
int64_t session_scope_mem_offset = 0; | |||||
ge::AttrUtils::GetInt(&model, ATTR_MODEL_SESSION_SCOPE_MEMORY_SIZE, session_scope_mem_offset); | |||||
EXPECT_EQ(session_scope_mem_offset, 1536); | |||||
} | |||||
TEST_F(UtestModelBuilderTest, test_model_save) { | TEST_F(UtestModelBuilderTest, test_model_save) { | ||||
Graph2SubGraphInfoList subgraphs; | Graph2SubGraphInfoList subgraphs; | ||||
std::map<std::string, int> stream_max_parallel_num; | std::map<std::string, int> stream_max_parallel_num; | ||||
@@ -43,6 +43,7 @@ | |||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "proto/ge_ir.pb.h" | #include "proto/ge_ir.pb.h" | ||||
#include "graph/manager/graph_var_manager.h" | |||||
#undef private | #undef private | ||||
#undef protected | #undef protected | ||||
@@ -194,6 +195,11 @@ TEST_F(UtestGeExecutor, kernel_ex_InitDumpTask) { | |||||
} | } | ||||
TEST_F(UtestGeExecutor, execute_graph_with_stream) { | TEST_F(UtestGeExecutor, execute_graph_with_stream) { | ||||
VarManager::Instance(0)->Init(0, 0, 0, 0); | |||||
map<string, string> options; | |||||
options[GRAPH_MEMORY_MAX_SIZE] = "1048576"; | |||||
VarManager::Instance(0)->SetMemoryMallocSize(options); | |||||
DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ||||
@@ -278,7 +284,6 @@ TEST_F(UtestGeExecutor, execute_graph_with_stream) { | |||||
OutputData output_data; | OutputData output_data; | ||||
vector<Tensor> outputs; | vector<Tensor> outputs; | ||||
EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); | EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS); | ||||
GraphExecutor graph_executer; | GraphExecutor graph_executer; | ||||
graph_executer.init_flag_ = true; | graph_executer.init_flag_ = true; | ||||
@@ -28,8 +28,7 @@ | |||||
#define protected public | #define protected public | ||||
#define private public | #define private public | ||||
#include "graph/manager/graph_caching_allocator.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#undef protected | #undef protected | ||||
#undef private | #undef private | ||||
@@ -0,0 +1,75 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include <gtest/gtest.h> | |||||
#include <memory> | |||||
#include "graph/anchor.h" | |||||
#include "graph/attr_value.h" | |||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "graph/utils/node_utils.h" | |||||
#include "graph/utils/op_desc_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "omg/omg_inner_types.h" | |||||
#define protected public | |||||
#define private public | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#undef protected | |||||
#undef private | |||||
using namespace std; | |||||
using namespace testing; | |||||
using namespace ge; | |||||
using domi::GetContext; | |||||
class UtestSessionScopeMemAllocator : public testing::Test { | |||||
protected: | |||||
void SetUp() {} | |||||
void TearDown() { GetContext().out_nodes_map.clear(); } | |||||
}; | |||||
TEST_F(UtestSessionScopeMemAllocator, initialize_success) { | |||||
std::vector<rtMemType_t> mem_type; | |||||
mem_type.push_back(RT_MEMORY_HBM); | |||||
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
MemManager::Instance().Finalize(); | |||||
} | |||||
TEST_F(UtestSessionScopeMemAllocator, malloc_success) { | |||||
std::vector<rtMemType_t> mem_type; | |||||
mem_type.push_back(RT_MEMORY_HBM); | |||||
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(1000, 0); | |||||
EXPECT_NE(nullptr, ptr); | |||||
MemManager::Instance().Finalize(); | |||||
} | |||||
TEST_F(UtestSessionScopeMemAllocator, free_success) { | |||||
std::vector<rtMemType_t> mem_type; | |||||
mem_type.push_back(RT_MEMORY_HBM); | |||||
EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); | |||||
uint8_t *ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); | |||||
EXPECT_NE(nullptr, ptr); | |||||
ptr = MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Malloc(100, 0); | |||||
EXPECT_NE(nullptr, ptr); | |||||
EXPECT_EQ(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); | |||||
EXPECT_NE(SUCCESS, MemManager::Instance().SessionScopeMemInstance(RT_MEMORY_HBM).Free(0)); | |||||
MemManager::Instance().Finalize(); | |||||
} |
@@ -35,7 +35,7 @@ | |||||
#include "graph/manager/graph_context.h" | #include "graph/manager/graph_context.h" | ||||
#include "graph/optimize/graph_optimize.h" | #include "graph/optimize/graph_optimize.h" | ||||
#include "graph/manager/util/variable_accelerate_ctrl.h" | #include "graph/manager/util/variable_accelerate_ctrl.h" | ||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/graph_mem_manager.h" | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph_builder_utils.h" | #include "graph_builder_utils.h" | ||||
#include "cce/dnn.h" | #include "cce/dnn.h" | ||||