You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

buffer_pool_mem_assigner.cc 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/buffer_pool_mem_assigner.h"
  17. #include "common/omg_util.h"
  18. #include "graph/utils/tensor_utils.h"
  19. #include "framework/common/util.h"
  20. #include "graph/compute_graph.h"
  21. #include "graph/debug/ge_attr_define.h"
  22. #include "common/math/math_util.h"
  23. #include "common/util/error_manager/error_manager.h"
  24. namespace ge {
  25. namespace {
  26. const size_t kBufferPoolNodeMemInfoLength = 2;
  27. const uint32_t kBufferPoolNodeOutputSizeIndex = 0;
  28. const uint32_t kBufferPoolNodeOutputOffsetIndex = 1;
  29. } // namespace
  30. Status BufferPoolMemAssigner::Assign() {
  31. if (compute_graph_ == nullptr) {
  32. GELOGE(PARAM_INVALID, "[Check][Graph]Graph is nullptr");
  33. REPORT_INNER_ERROR("E19999", "Input graph is nullptr");
  34. return PARAM_INVALID;
  35. }
  36. Status ret = InitAssigner(compute_graph_);
  37. if (ret != SUCCESS) {
  38. GELOGE(FAILED, "[Init][Assigner]Graph:%s.", compute_graph_->GetName().c_str());
  39. return FAILED;
  40. }
  41. ret = AssignOutput();
  42. if (ret != SUCCESS) {
  43. GELOGE(FAILED, "[Assign][Output]Graph:%s.", compute_graph_->GetName().c_str());
  44. return FAILED;
  45. }
  46. return SUCCESS;
  47. }
  48. Status BufferPoolMemAssigner::GetOutputMemoryType(const NodePtr &node, size_t idx, int64_t &memory_type) {
  49. GE_CHECK_NOTNULL(node->GetOpDesc());
  50. memory_type = RT_MEMORY_HBM;
  51. std::vector<int64_t> type_list;
  52. bool has_mem_type = ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, type_list);
  53. if (has_mem_type && (type_list.size() != node->GetOpDesc()->GetOutputsSize() || idx >= type_list.size())) {
  54. GELOGE(PARAM_INVALID, "[Check][OutputParam]Output param invalid, output size:%zu, mem type size:%zu, index:%zu.",
  55. node->GetOpDesc()->GetOutputsSize(), type_list.size(), idx);
  56. REPORT_INNER_ERROR("E19999", "Output param invalid, output size:%zu, mem type size:%zu, index:%zu.",
  57. node->GetOpDesc()->GetOutputsSize(), type_list.size(), idx);
  58. return PARAM_INVALID;
  59. }
  60. memory_type = has_mem_type ? type_list[idx] : RT_MEMORY_HBM;
  61. return SUCCESS;
  62. }
  63. Status BufferPoolMemAssigner::InitAssigner(const ComputeGraphPtr &graph) {
  64. for (const NodePtr &node : graph->GetAllNodes()) {
  65. int64_t buffer_pool_id = 0;
  66. int64_t buffer_pool_size = 0;
  67. bool get_attr = AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_BUFFER_POOL_ID, buffer_pool_id);
  68. get_attr = get_attr && (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_BUFFER_POOL_SIZE, buffer_pool_size));
  69. if (get_attr) {
  70. std::string batch_label;
  71. (void) AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label);
  72. buffer_pool_nodes_[batch_label][buffer_pool_id].emplace_back(node);
  73. auto iter = buffer_pool_size_[batch_label].find(buffer_pool_id);
  74. if (iter == buffer_pool_size_[batch_label].end()) {
  75. buffer_pool_size_[batch_label][buffer_pool_id] = buffer_pool_size;
  76. }
  77. Status ret = InitMemOffsetBase(node);
  78. if (ret != SUCCESS) {
  79. GELOGE(ret, "[Init][MemOffsetBase]Batch label:%s.", batch_label.c_str());
  80. REPORT_INNER_ERROR("E19999", "Failed to init offset base, batch label:%s.", batch_label.c_str());
  81. return ret;
  82. }
  83. }
  84. }
  85. int64_t max_size = 0;
  86. for (const auto &iter : buffer_pool_size_) {
  87. std::string batch_label = iter.first;
  88. int64_t batch_offset = mem_offset_base_;
  89. for (const auto &buffer_pool : iter.second) {
  90. int64_t buffer_pool_id = buffer_pool.first;
  91. int64_t buffer_pool_size = buffer_pool.second;
  92. buffer_pool_offset_base_[batch_label][buffer_pool_id] = batch_offset;
  93. FMK_INT64_ADDCHECK(buffer_pool_size, kBufferPoolMemAlignSize);
  94. AlignMemSize(buffer_pool_size, kBufferPoolMemAlignSize);
  95. FMK_INT64_ADDCHECK(batch_offset, (buffer_pool_size + kBufferPoolMemAlignSize));
  96. batch_offset += (buffer_pool_size + kBufferPoolMemAlignSize);
  97. }
  98. int64_t batch_mem_size = batch_offset - mem_offset_base_;
  99. GELOGI("[Init][Assigner]Get batch mem size, batch label:%s, mem size:%ld.", batch_label.c_str(), batch_mem_size);
  100. if (max_size < batch_mem_size) {
  101. max_size = batch_mem_size;
  102. }
  103. }
  104. FMK_INT64_ADDCHECK(mem_offset_base_, max_size);
  105. mem_offset_ = static_cast<size_t>(mem_offset_base_ + max_size);
  106. GELOGI("[Init][Assigner]Init buffer pool mem assigner successfully, "
  107. "mem type:%ld, mem offset base:%ld, mem offset:%zu.", mem_type_, mem_offset_base_, mem_offset_);
  108. return SUCCESS;
  109. }
  110. Status BufferPoolMemAssigner::InitMemOffsetBase(const NodePtr &node) {
  111. int64_t mem_type;
  112. Status ret = GetOutputMemoryType(node, static_cast<size_t>(kBufferPoolNodeOutIndex), mem_type);
  113. if (ret != SUCCESS) {
  114. GELOGE(ret, "[Get][MemType]Node:%s, index:%u.", node->GetName().c_str(), kBufferPoolNodeOutIndex);
  115. REPORT_INNER_ERROR("E19999", "Failed to get output memory type, node:%s, index:%u.",
  116. node->GetName().c_str(), kBufferPoolNodeOutIndex);
  117. return ret;
  118. }
  119. if (mem_type_ != mem_type && init_offset_base_) {
  120. GELOGE(PARAM_INVALID, "[Check][MemType]The memory type of all buffer pool nodes must be the same, node:%s, "
  121. "required:%ld, actually: %ld", node->GetName().c_str(), mem_type_, mem_type);
  122. REPORT_INNER_ERROR("E19999", "The memory type of all buffer pool nodes must be the same, node:%s, "
  123. "required:%ld, actually: %ld", node->GetName().c_str(), mem_type_, mem_type);
  124. return PARAM_INVALID;
  125. }
  126. if (!init_offset_base_) {
  127. auto iter = mem_type_to_offset_.find(mem_type);
  128. if (iter == mem_type_to_offset_.end()) {
  129. GELOGE(PARAM_INVALID, "[Check][MemType]Memory type is not supported, node:%s, mem type:%ld.",
  130. node->GetName().c_str(), mem_type);
  131. REPORT_INNER_ERROR("E19999", "Memory type is not supported, node:%s, mem type:%ld.",
  132. node->GetName().c_str(), mem_type);
  133. return PARAM_INVALID;
  134. }
  135. mem_offset_base_ = static_cast<int64_t>(iter->second);
  136. FMK_INT64_ADDCHECK(mem_offset_base_, (kBufferPoolMemAlignSize + kBufferPoolMemAlignSize));
  137. AlignMemSize(mem_offset_base_, kBufferPoolMemAlignSize);
  138. // The HCOM nodes may access the previous 512 bytes.
  139. mem_offset_base_ += kBufferPoolMemAlignSize;
  140. mem_type_ = mem_type;
  141. init_offset_base_ = true;
  142. GELOGI("[Init][MemOffsetBase]Init offset base:%ld, memory type:%ld", mem_offset_base_, mem_type);
  143. }
  144. return SUCCESS;
  145. }
  146. Status BufferPoolMemAssigner::AssignOutput() {
  147. for (auto &batch_pool_nodes_map : buffer_pool_nodes_) {
  148. std::string batch_label = batch_pool_nodes_map.first;
  149. for (auto &pool_nodes_map : batch_pool_nodes_map.second) {
  150. int64_t buffer_pool_id = pool_nodes_map.first;
  151. auto iter_buffer_id_size = buffer_pool_size_[batch_label].find(buffer_pool_id);
  152. if (iter_buffer_id_size == buffer_pool_size_[batch_label].end()) {
  153. GELOGE(INTERNAL_ERROR, "[Get][BufferPoolSize]Pool id:%ld.", buffer_pool_id);
  154. REPORT_INNER_ERROR("E19999", "Failed to get buffer pool size, pool id:%ld.", buffer_pool_id);
  155. return INTERNAL_ERROR;
  156. }
  157. auto iter_buffer_id_offset = buffer_pool_offset_base_[batch_label].find(buffer_pool_id);
  158. if (iter_buffer_id_offset == buffer_pool_offset_base_[batch_label].end()) {
  159. GELOGE(INTERNAL_ERROR, "[Get][BufferPoolBaseOffset]Pool id:%ld.", buffer_pool_id);
  160. REPORT_INNER_ERROR("E19999", "Failed to get buffer pool base offset, pool id:%ld.", buffer_pool_id);
  161. return INTERNAL_ERROR;
  162. }
  163. int64_t buffer_pool_size = iter_buffer_id_size->second;
  164. int64_t output_offset_base = iter_buffer_id_offset->second;
  165. Status ret = AssignOutputInOneBufferPool(batch_label, output_offset_base, pool_nodes_map.second);
  166. if (ret != SUCCESS) {
  167. GELOGE(ret, "[Assign][OneBufferPool]Batch label:%s, pool id:%ld, pool size:%ld, offset base:%ld.",
  168. batch_label.c_str(), buffer_pool_id, buffer_pool_size, output_offset_base);
  169. REPORT_INNER_ERROR("E19999", "Failed to assign output memory, batch label:%s, "
  170. "pool id:%ld, pool size:%ld, offset base:%ld.",
  171. batch_label.c_str(), buffer_pool_id, buffer_pool_size, output_offset_base);
  172. return ret;
  173. }
  174. GELOGI("[Assign][Output]Assign output successfully, batch label:%s, pool id:%ld, pool size:%ld, offset base:%ld.",
  175. batch_label.c_str(), buffer_pool_id, buffer_pool_size, output_offset_base);
  176. }
  177. }
  178. return SUCCESS;
  179. }
  180. Status BufferPoolMemAssigner::AssignOutputInOneBufferPool(const std::string &batch_label,
  181. int64_t output_offset_base,
  182. const std::vector<NodePtr> &buffer_pool_nodes) {
  183. for (const NodePtr &node : buffer_pool_nodes) {
  184. int64_t output_size = 0;
  185. Status ret = GetMemorySize(node, output_size);
  186. if (ret != SUCCESS) {
  187. GELOGE(ret, "[Get][MemSize]Node:%s.", node->GetName().c_str());
  188. REPORT_INNER_ERROR("E19999", "Failed to get output size, node:%s.", node->GetName().c_str());
  189. return ret;
  190. }
  191. OpDescPtr op_desc = node->GetOpDesc();
  192. GE_CHECK_NOTNULL(op_desc);
  193. vector<int64_t> memory_size_and_offset;
  194. bool get_attr = AttrUtils::GetListInt(op_desc, ATTR_NAME_BUFFER_POOL_NODE_SIZE_AND_OFFSET, memory_size_and_offset);
  195. if (!get_attr || memory_size_and_offset.size() != kBufferPoolNodeMemInfoLength) {
  196. GELOGE(PARAM_INVALID, "[Get][Attr]Node:%s, mem info size:%zu, required size:%zu.",
  197. node->GetName().c_str(), memory_size_and_offset.size(), kBufferPoolNodeMemInfoLength);
  198. REPORT_INNER_ERROR("E19999", "Failed to get pool node memory info, node:%s, info size:%zu, required size:%zu.",
  199. node->GetName().c_str(), memory_size_and_offset.size(), kBufferPoolNodeMemInfoLength);
  200. return PARAM_INVALID;
  201. }
  202. if (output_size != memory_size_and_offset[kBufferPoolNodeOutputSizeIndex]) {
  203. GELOGE(PARAM_INVALID, "[Check][MemSize]Something wrong with memory size, pre size:%ld, curr size:%ld, node:%s.",
  204. memory_size_and_offset[kBufferPoolNodeOutputSizeIndex], output_size, node->GetName().c_str());
  205. REPORT_INNER_ERROR("E19999", "Something wrong with memory size, pre size:%ld, curr size:%ld, node:%s.",
  206. memory_size_and_offset[kBufferPoolNodeOutputSizeIndex], output_size, node->GetName().c_str());
  207. return PARAM_INVALID;
  208. }
  209. int64_t logical_offset = memory_size_and_offset[kBufferPoolNodeOutputOffsetIndex];
  210. vector<int64_t> output_list = {(output_offset_base + logical_offset)};
  211. op_desc->SetOutputOffset(output_list);
  212. // log for IMAS tools
  213. GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] "
  214. "size[%zu] realsize[%zu] noalignsize[%zu] life time begin[%d] life time end[%d] "
  215. "child[%d:%d:%d:%d:%d] isref[%d] batch[%s]",
  216. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(),
  217. "output", kBufferPoolNodeOutIndex, output_list[kBufferPoolNodeOutIndex], op_desc->GetStreamId(), mem_type_,
  218. static_cast<size_t>(output_size), static_cast<size_t>(output_size), static_cast<size_t>(output_size),
  219. 0, 0, 0, 0, 0, 0, 0, 0, batch_label.c_str());
  220. }
  221. return SUCCESS;
  222. }
  223. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示