You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

binary_block_mem_assigner.cc 5.0 kB

5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/binary_block_mem_assigner.h"
  17. #include <algorithm>
  18. #include "framework/common/debug/ge_log.h"
  19. #include "graph/utils/type_utils.h"
  20. namespace {
  21. const uint32_t kRangeCeilInterval = 2;
  22. const uint32_t kLogBase = 2;
  23. const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024
  24. const int64_t kLargeBlockRangeSize = 2;
  25. } // namespace
  26. namespace ge {
  27. using std::vector;
  28. void BinaryBlockMemAssigner::PlanRanges(size_t range_number_limit, vector<vector<int64_t>> &ranges) {
  29. /// range delete and merge
  30. /// result after delete and merge is: [[6,12],[16,24,24],[30,32,48],[60,256]]
  31. bool changed = false;
  32. vector<int64_t> temp;
  33. do {
  34. changed = false;
  35. for (auto iter = ranges.begin(); iter != ranges.end();) {
  36. if (!temp.empty()) {
  37. iter->insert(iter->end(), temp.begin(), temp.end());
  38. temp.clear();
  39. }
  40. if (iter->empty()) {
  41. iter = ranges.erase(iter);
  42. changed = true;
  43. } else if ((iter->size() < range_number_limit) && (ranges.end() - iter > 1) &&
  44. !(iter->at(0) >= kLargeBlockSize && iter->size() >= kLargeBlockRangeSize)) {
  45. temp.insert(temp.end(), iter->begin(), iter->end());
  46. iter = ranges.erase(iter);
  47. changed = true;
  48. } else {
  49. ++iter;
  50. }
  51. }
  52. } while (changed);
  53. }
  54. ///
  55. /// @ingroup domi_omg
  56. /// @brief memory size fixed for reuse. this function determines memory types and sizes
  57. /// @param [out] range_ceils return memory sizes
  58. /// @return Status result
  59. /// @author
  60. ///
  61. Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
  62. vector<int64_t> all_memory_size;
  63. GetOutAndWorkSpaceMem(all_memory_size);
  64. if (all_memory_size.empty()) {
  65. GELOGW("Vector all_memory_size is empty!");
  66. return SUCCESS;
  67. }
  68. if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
  69. GELOGE(FAILED, "[check][mem_range_step]first mem_range_step:%ld less than 0,invalid,"
  70. "maybe has dynamic shape in graph", all_memory_size.front());
  71. REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid,"
  72. "maybe has dynamic shape in graph", all_memory_size.front());
  73. return FAILED;
  74. }
  75. // Memory size is 512 aligned, so it is not necessary to take less than 512
  76. int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front();
  77. auto range_number = static_cast<size_t>(
  78. ceil(log(all_memory_size.back() / static_cast<double>(min_memory_size)) / log(kLogBase)));
  79. range_number = (range_number == 0) ? 1 : range_number;
  80. GELOGD("Range number: %zu", range_number);
  81. vector<vector<int64_t>> ranges(range_number);
  82. GE_CHK_BOOL_EXEC((range_number != 0),
  83. REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid");
  84. return PARAM_INVALID,
  85. "[check][range_number]inner data is 0, judge invalid.");
  86. size_t range_number_limit = all_memory_size.size() / range_number;
  87. int64_t range_ceil = min_memory_size;
  88. for (size_t i = 1; i <= range_number; i++) {
  89. GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
  90. GELOGE(FAILED, "[check][mem_range_ceil]Multiply result is out of range,"
  91. "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
  92. REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range,"
  93. "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
  94. return FAILED);
  95. range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time.
  96. for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
  97. if (*iter <= range_ceil) {
  98. ranges[i - 1].push_back(*iter);
  99. iter = all_memory_size.erase(iter);
  100. } else {
  101. break;
  102. }
  103. }
  104. }
  105. GELOGD("Origin ranges:");
  106. for (auto &v : ranges) {
  107. GELOGD("__%s", ToString(v).c_str());
  108. }
  109. PlanRanges(range_number_limit, ranges);
  110. GELOGD("Origin ranges:");
  111. for (auto &v : ranges) {
  112. GELOGD("__%s", ToString(v).c_str());
  113. }
  114. for (auto &range : ranges) {
  115. std::sort(range.begin(), range.end());
  116. if (!range.empty()) {
  117. range_ceils.push_back(range.back());
  118. }
  119. }
  120. GELOGI("Range ceils: %s", ToString(range_ceils).c_str());
  121. return SUCCESS;
  122. }
  123. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示