You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_caching_allocator.h 6.2 kB

4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_
  17. #define GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_
  18. #include <iostream>
  19. #include <map>
  20. #include <memory>
  21. #include <mutex>
  22. #include <string>
  23. #include <vector>
  24. #include <set>
  25. #include <unordered_map>
  26. #include <unordered_set>
  27. #include "framework/common/debug/ge_log.h"
  28. #include "framework/common/ge_inner_error_codes.h"
  29. #include "graph/node.h"
  30. #include "graph/manager/block_memory.h"
  31. #include "runtime/mem.h"
  32. namespace ge {
  33. constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes
  34. constexpr size_t kBinSizeUnit4 = 4;
  35. constexpr size_t kBinSizeUnit8 = 8;
  36. constexpr size_t kBinSizeUnit32 = 32;
  37. constexpr size_t kBinSizeUnit128 = 128;
  38. constexpr size_t kBinSizeUnit256 = 256;
  39. constexpr size_t kBinSizeUnit512 = 512;
  40. constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold
  41. constexpr size_t kKByteSize = 1024;
  42. constexpr size_t kMByteSize = 1048576; // 1024 * 1024
  43. constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024
  44. static const uint32_t kNumBins = 7;
  45. class MemoryAllocator;
  46. class CachingAllocator {
  47. public:
  48. explicit CachingAllocator(rtMemType_t memory_type);
  49. CachingAllocator(const CachingAllocator &) = delete;
  50. CachingAllocator &operator=(const CachingAllocator &) = delete;
  51. virtual ~CachingAllocator() = default;
  52. ///
  53. /// @ingroup ge_graph
  54. /// @brief caching allocator init
  55. /// @param [in] device id
  56. /// @return Status of init
  57. ///
  58. Status Initialize(uint32_t device_id = 0);
  59. ///
  60. /// @ingroup ge_graph
  61. /// @brief memory allocator finalize, release cached memory
  62. /// @return void
  63. ///
  64. void Finalize(uint32_t device_id = 0);
  65. ///
  66. /// @ingroup ge_graph
  67. /// @brief malloc memory
  68. /// @param [in] size memory size
  69. /// @param [in] try to reuse the same memory
  70. /// @param [in] device id
  71. /// @return memory address
  72. ///
  73. uint8_t *Malloc(size_t size, uint8_t *org_ptr = nullptr, uint32_t device_id = 0);
  74. ///
  75. /// @ingroup ge_graph
  76. /// @brief free memory
  77. /// @param [in] memory_ptr memory address ptr
  78. /// @param [in] device_id device id
  79. /// @return Status result of function
  80. ///
  81. Status Free(uint8_t *memory_addr, uint32_t device_id = 0);
  82. ///
  83. /// @ingroup ge_graph
  84. /// @brief try to free memory when no memory is referenced
  85. /// @return void
  86. ///
  87. void TryFreeBlocks();
  88. void TryFreeUnusedBlocks()
  89. private:
  90. ///
  91. /// @ingroup ge_graph
  92. /// @brief extend cache by size
  93. /// @param [in] memory size
  94. /// @param [in] device id
  95. /// @return Status result of function
  96. ///
  97. Status TryExtendCache(size_t size, uint32_t device_id);
  98. ///
  99. /// @ingroup ge_graph
  100. /// @brief find free block by size
  101. /// @param [in] memory size
  102. /// @param [in] device_id device id
  103. /// @return block ptr
  104. ///
  105. Block *FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t device_id);
  106. ///
  107. /// @ingroup ge_graph
  108. /// @brief get the right bin based on size
  109. /// @param [in] original malloc size
  110. /// @return block bin
  111. ///
  112. BlockBin *GetBlockBin(size_t size);
  113. ///
  114. /// @ingroup ge_graph
  115. /// @brief add memory to right bin based on size
  116. /// @param [in] memory ptr
  117. /// @param [in] memory size
  118. /// @param [in] device_id device id
  119. /// @return Status result of function
  120. ///
  121. Status AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id);
  122. ///
  123. /// @ingroup ge_graph
  124. /// @brief free block to right bin
  125. /// @param [in] block ptr
  126. /// @return void
  127. ///
  128. void FreeBlock(Block* block);
  129. ///
  130. /// @ingroup ge_graph
  131. /// @brief free all cached blocks to right bin and release the memory when memory is not enough
  132. /// @return free cached memory size
  133. ///
  134. size_t FreeCachedBlocks();
  135. ///
  136. /// @ingroup ge_graph
  137. /// @brief free allocated and cached blocks and release the memory when process exit
  138. /// @return void
  139. ///
  140. void FreeBlocks();
  141. ///
  142. /// @ingroup ge_graph
  143. /// @brief free block bins when process exit
  144. /// @return void
  145. ///
  146. void FreeBlockBins();
  147. ///
  148. /// @ingroup ge_graph
  149. /// @brief If a split block is freed, try merging with the original block
  150. /// @param [inout] dest block ptr
  151. /// @param [in] src block ptr
  152. /// @param [out] block bin
  153. /// @return void
  154. ///
  155. void MergeBlocks(Block *dst, Block *src, BlockBin &bin);
  156. ///
  157. /// @ingroup ge_graph
  158. /// @brief If the allocated memory size is too much smaller than the memory block, try to split the memory block
  159. /// @param [in] original block ptr
  160. /// @param [in] allocated memory size
  161. /// @param [in] block bin
  162. /// @param [in] device id
  163. /// @return splited block ptr
  164. ///
  165. Block *SplitBlock(Block *block, size_t size, BlockBin &bin, uint32_t device_id);
  166. ///
  167. /// @ingroup ge_graph
  168. /// @brief print the memory info in pool
  169. /// @param [in] log level
  170. /// @return void
  171. ///
  172. void PrintStatics(int32_t level = DLOG_INFO);
  173. private:
  174. rtMemType_t memory_type_;
  175. // device memory allocator
  176. MemoryAllocator *memory_allocator_;
  177. // lock around all operations
  178. mutable std::recursive_mutex mutex_;
  179. // allocated blocks by memory pointer
  180. std::unordered_map<uint8_t *, Block *> allocated_blocks_;
  181. // block bins by different block size
  182. BlockBin *free_block_bins_[kNumBins];
  183. // malloced memorys from device
  184. std::map<size_t, size_t> malloced_memory_;
  185. //user call Malloc total counts
  186. std::atomic<size_t> called_malloc_counts_;
  187. //user call Free total counts
  188. std::atomic<size_t> called_free_counts_;
  189. };
  190. } // namespace ge
  191. #endif // GE_GRAPH_MANAGER_GRAPH_CACHING_ALLOCATOR_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示