You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

binary_block_mem_assigner.cc 4.3 kB

5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/binary_block_mem_assigner.h"
  17. #include <algorithm>
  18. #include "framework/common/debug/ge_log.h"
  19. #include "graph/utils/type_utils.h"
  20. namespace {
  21. const uint32_t kRangeCeilInterval = 2;
  22. const uint32_t kLogBase = 2;
  23. const int64_t kLargeBlockSize = 8388608; // 8 * 1024 * 1024
  24. const int64_t kLargeBlockRangeSize = 2;
  25. } // namespace
  26. namespace ge {
  27. using std::vector;
  28. void BinaryBlockMemAssigner::PlanRanges(size_t range_number_limit, vector<vector<int64_t>> &ranges) {
  29. /// range delete and merge
  30. /// result after delete and merge is: [[6,12],[16,24,24],[30,32,48],[60,256]]
  31. bool changed = false;
  32. vector<int64_t> temp;
  33. do {
  34. changed = false;
  35. for (auto iter = ranges.begin(); iter != ranges.end();) {
  36. if (!temp.empty()) {
  37. iter->insert(iter->end(), temp.begin(), temp.end());
  38. temp.clear();
  39. }
  40. if (iter->empty()) {
  41. iter = ranges.erase(iter);
  42. changed = true;
  43. } else if ((iter->size() < range_number_limit) && (ranges.end() - iter > 1) &&
  44. !(iter->at(0) >= kLargeBlockSize && iter->size() >= kLargeBlockRangeSize)) {
  45. temp.insert(temp.end(), iter->begin(), iter->end());
  46. iter = ranges.erase(iter);
  47. changed = true;
  48. } else {
  49. ++iter;
  50. }
  51. }
  52. } while (changed);
  53. }
  54. ///
  55. /// @ingroup domi_omg
  56. /// @brief memory size fixed for reuse. this function determines memory types and sizes
  57. /// @param [out] range_ceils return memory sizes
  58. /// @return Status result
  59. /// @author
  60. ///
  61. Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
  62. vector<int64_t> all_memory_size;
  63. GetOutAndWorkSpaceMem(all_memory_size);
  64. if (all_memory_size.empty()) {
  65. GELOGW("Vector all_memory_size is empty!");
  66. return SUCCESS;
  67. }
  68. if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) {
  69. GELOGE(FAILED, "dividend is 0!");
  70. return FAILED;
  71. }
  72. // Memory size is 512 aligned, so it is not necessary to take less than 512
  73. int64_t min_memory_size = (all_memory_size.back() > MEM_ALIGN_SIZE) ? MEM_ALIGN_SIZE : all_memory_size.front();
  74. auto range_number = static_cast<size_t>(
  75. ceil(log(all_memory_size.back() / static_cast<double>(min_memory_size)) / log(kLogBase)));
  76. range_number = (range_number == 0) ? 1 : range_number;
  77. GELOGD("Range number: %zu", range_number);
  78. vector<vector<int64_t>> ranges(range_number);
  79. GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
  80. size_t range_number_limit = all_memory_size.size() / range_number;
  81. int64_t range_ceil = min_memory_size;
  82. for (size_t i = 1; i <= range_number; i++) {
  83. GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
  84. GELOGE(FAILED, "Multiply result is out of range.");
  85. return FAILED);
  86. range_ceil *= kRangeCeilInterval; // The block size of each interval is doubled every time.
  87. for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
  88. if (*iter <= range_ceil) {
  89. ranges[i - 1].push_back(*iter);
  90. iter = all_memory_size.erase(iter);
  91. } else {
  92. break;
  93. }
  94. }
  95. }
  96. GELOGD("Origin ranges:");
  97. for (auto &v : ranges) {
  98. GELOGD("__%s", ToString(v).c_str());
  99. }
  100. PlanRanges(range_number_limit, ranges);
  101. GELOGD("Origin ranges:");
  102. for (auto &v : ranges) {
  103. GELOGD("__%s", ToString(v).c_str());
  104. }
  105. for (auto &range : ranges) {
  106. std::sort(range.begin(), range.end());
  107. if (!range.empty()) {
  108. range_ceils.push_back(range.back());
  109. }
  110. }
  111. GELOGI("Range ceils: %s", ToString(range_ceils).c_str());
  112. return SUCCESS;
  113. }
  114. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示