You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

rdma_pool_allocator.cc 7.4 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/manager/rdma_pool_allocator.h"
  17. #include <framework/common/debug/log.h>
  18. #include "framework/common/debug/ge_log.h"
  19. #include "graph/ge_context.h"
  20. #include "runtime/dev.h"
  21. #include "graph/manager/graph_mem_manager.h"
  22. namespace {
  23. const size_t kAlignedSize = 512;
  24. const float kSplitThreshold = 0.5;
  25. inline size_t GetAlignedBlockSize(size_t size) {
  26. if (size == 0) {
  27. return kAlignedSize;
  28. }
  29. return kAlignedSize * ((size + kAlignedSize - 1) / kAlignedSize);
  30. }
  31. inline bool ShouldSplit(const ge::Block *block, size_t size) {
  32. return static_cast<double>(size) <= (static_cast<double>(block->size) * kSplitThreshold);
  33. }
  34. inline bool CanMerge(ge::Block *block) { return block != nullptr && !block->allocated; }
  35. } // namespace
  36. namespace ge {
  37. RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type)
  38. : memory_type_(memory_type), block_bin_(BlockBin([](const Block *left, const Block *right) {
  39. if (left->size != right->size) {
  40. return left->size < right->size;
  41. }
  42. return reinterpret_cast<uintptr_t>(left->ptr) < reinterpret_cast<uintptr_t>(right->ptr);
  43. })) {}
  44. Status RdmaPoolAllocator::Initialize() {
  45. memory_allocator_ = &MemManager::Instance().MemInstance(memory_type_);
  46. if (memory_allocator_ == nullptr) {
  47. return ACL_ERROR_GE_INTERNAL_ERROR;
  48. }
  49. return ge::SUCCESS;
  50. }
  51. void RdmaPoolAllocator::Finalize() {
  52. GELOGD("Rdma pool finalize start.");
  53. for (auto it = allocated_blocks_.begin(); it != allocated_blocks_.end();) {
  54. auto block = it->second;
  55. it = allocated_blocks_.erase(it);
  56. delete block;
  57. }
  58. for (auto it = block_bin_.begin(); it != block_bin_.end();) {
  59. auto block = *it;
  60. it = block_bin_.erase(it);
  61. delete block;
  62. }
  63. if (rdma_base_addr_ != nullptr) {
  64. GELOGD("Start to free rdma pool memory.");
  65. if (memory_allocator_->FreeMemory(rdma_base_addr_) != SUCCESS) {
  66. GELOGW("Free rdma pool memory failed");
  67. }
  68. rdma_base_addr_ = nullptr;
  69. }
  70. }
  71. Status RdmaPoolAllocator::InitMemory(size_t mem_size) {
  72. auto device_id = GetContext().DeviceId();
  73. GELOGD("Init Rdma Memory with size [%zu] for devid:[%u]", mem_size, device_id);
  74. if (rdma_base_addr_ != nullptr) {
  75. REPORT_INNER_ERROR("E19999", "Param rdma_base_addr_ is nullptr, check invalid");
  76. GELOGE(GE_MULTI_INIT, "Rdma pool has been malloced");
  77. return GE_MULTI_INIT;
  78. }
  79. const std::string purpose = "Memory for rdma pool.";
  80. std::lock_guard<std::recursive_mutex> lock(mutex_);
  81. auto dev_id = static_cast<int32_t>(device_id);
  82. GE_CHK_RT_RET(rtSetDevice(dev_id));
  83. // DeviceReset before memory finished!
  84. GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(dev_id)); });
  85. rdma_base_addr_ = memory_allocator_->MallocMemory(purpose, mem_size, device_id);
  86. if (rdma_base_addr_ == nullptr) {
  87. GELOGE(GE_GRAPH_MALLOC_FAILED, "Rdma pool memory malloc failed");
  88. return GE_GRAPH_MALLOC_FAILED;
  89. }
  90. rdma_mem_size_ = mem_size;
  91. // Init with a base block.
  92. auto *base_block = new (std::nothrow) Block(device_id, mem_size, rdma_base_addr_);
  93. if (base_block == nullptr) {
  94. REPORT_CALL_ERROR("E19999", "New Block failed, device_id:%u", device_id);
  95. GELOGE(GE_GRAPH_MALLOC_FAILED, "Block malloc failed");
  96. return GE_GRAPH_MALLOC_FAILED;
  97. }
  98. block_bin_.insert(base_block);
  99. return SUCCESS;
  100. }
  101. uint8_t *RdmaPoolAllocator::Malloc(size_t size, uint32_t device_id) {
  102. GELOGI("start to malloc rdma memory size:%zu, device id = %u", size, device_id);
  103. auto aligned_size = GetAlignedBlockSize(size);
  104. Block key(device_id, aligned_size, nullptr);
  105. std::lock_guard<std::recursive_mutex> lock(mutex_);
  106. auto it = block_bin_.lower_bound(&key);
  107. if (it != block_bin_.end()) {
  108. Block *block = *it;
  109. block_bin_.erase(it);
  110. block->allocated = true;
  111. if (block->ptr == nullptr) {
  112. REPORT_INNER_ERROR("E19999", "Rdmapool memory address is nullptr, device_id:%u, check invalid",
  113. device_id);
  114. GELOGE(INTERNAL_ERROR, "Rdmapool memory address is nullptr.");
  115. return nullptr;
  116. }
  117. allocated_blocks_.emplace(block->ptr, block);
  118. if (ShouldSplit(block, aligned_size)) {
  119. GELOGD("Block will be splited block size = %zu, aligned_size:%zu", block->size, aligned_size);
  120. auto *new_block =
  121. new (std::nothrow) Block(device_id, block->size - aligned_size, nullptr, block->ptr + aligned_size);
  122. if (new_block == nullptr) {
  123. GELOGW("Block split failed");
  124. return block->ptr;
  125. }
  126. new_block->next = block->next;
  127. if (block->next != nullptr) {
  128. block->next->prev = new_block;
  129. }
  130. new_block->prev = block;
  131. block->next = new_block;
  132. block->size = aligned_size;
  133. block_bin_.insert(new_block);
  134. }
  135. GELOGD("Find block size = %zu", block->size);
  136. return block->ptr;
  137. }
  138. GELOGW("Memory block not founded.");
  139. return nullptr;
  140. }
  141. Status RdmaPoolAllocator::Free(uint8_t *memory_addr, uint32_t device_id) {
  142. GELOGI("Free rdma memory, device id = %u", device_id);
  143. if (memory_addr == nullptr) {
  144. REPORT_INNER_ERROR("E19999", "Param memory_addr is nullptr, device_id:%u, check invalid",
  145. device_id);
  146. GELOGE(GE_GRAPH_FREE_FAILED, "Invalid memory pointer");
  147. return GE_GRAPH_FREE_FAILED;
  148. }
  149. std::lock_guard<std::recursive_mutex> lock(mutex_);
  150. auto it = allocated_blocks_.find(memory_addr);
  151. if (it == allocated_blocks_.end()) {
  152. REPORT_INNER_ERROR("E19999", "Param memory_addr is not allocated before, device_id:%u, "
  153. "check invalid", device_id);
  154. GELOGE(PARAM_INVALID, "Invalid memory pointer");
  155. return PARAM_INVALID;
  156. }
  157. Block *block = it->second;
  158. block->allocated = false;
  159. allocated_blocks_.erase(it);
  160. Block *merge_blocks[] = {block->prev, block->next};
  161. for (Block *merge_block : merge_blocks) {
  162. MergeBlocks(block, merge_block);
  163. }
  164. block_bin_.insert(block);
  165. return SUCCESS;
  166. }
  167. void RdmaPoolAllocator::MergeBlocks(Block *dst, Block *src) {
  168. if (!CanMerge(dst) || !CanMerge(src)) {
  169. return;
  170. }
  171. if (dst->prev == src) {
  172. dst->ptr = src->ptr;
  173. dst->prev = src->prev;
  174. if (dst->prev != nullptr) {
  175. dst->prev->next = dst;
  176. }
  177. } else {
  178. dst->next = src->next;
  179. if (dst->next != nullptr) {
  180. dst->next->prev = dst;
  181. }
  182. }
  183. dst->size += src->size;
  184. block_bin_.erase(src);
  185. delete src;
  186. }
  187. Status RdmaPoolAllocator::GetBaseAddr(uint64_t &base_addr, uint64_t &mem_size) {
  188. if (rdma_base_addr_ == nullptr) {
  189. REPORT_INNER_ERROR("E19999", "Param rdma_base_addr_ is nullptr, check invalid");
  190. GELOGE(INTERNAL_ERROR, "Rdma base addr is nullptr.");
  191. return INTERNAL_ERROR;
  192. }
  193. base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(rdma_base_addr_));
  194. mem_size = rdma_mem_size_;
  195. return SUCCESS;
  196. }
  197. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示