You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ts_mem_mall.h 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_LOAD_TS_MEM_MALL_H_
  17. #define GE_GRAPH_LOAD_TS_MEM_MALL_H_
  18. #include <mutex>
  19. #include <unordered_map>
  20. #include <memory>
  21. #include "runtime/base.h"
  22. #include "framework/common/debug/ge_log.h"
  23. namespace {
  24. constexpr uint32_t kMaxTsMemBlock = 2 * 1024 * 1024; // Max block 2M
  25. constexpr uint32_t kTsMemAligment = 64; // Malloc for 64 bits align
  26. constexpr uint32_t kTsMemAlignMask = kTsMemAligment - 1;
  27. }
  28. namespace ge {
  29. class TsMemMall {
  30. public:
  31. TsMemMall() {
  32. mem_type_ = RT_MEMORY_TS_4G;
  33. }
  34. TsMemMall(rtMemType_t type) {
  35. mem_type_ = type;
  36. }
  37. ~TsMemMall() {
  38. for (auto it : mem_store_size_) {
  39. rtError_t ret = rtFree(it.second);
  40. if (ret != RT_ERROR_NONE) {
  41. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret);
  42. }
  43. }
  44. mem_store_size_.clear();
  45. mem_store_addr_.clear();
  46. }
  47. void *Acquire(int64_t offset, uint64_t size) {
  48. if (size == 0) {
  49. GELOGE(RT_FAILED, "Acquire mem block failed, size: %lu", size);
  50. return nullptr;
  51. }
  52. uint64_t bytes = (size + kTsMemAlignMask) & ~kTsMemAlignMask;
  53. if (bytes > kMaxTsMemBlock) {
  54. GELOGW("Acquire TS memory may not physical continuity, size: %lu", bytes);
  55. }
  56. std::lock_guard<std::mutex> lock(mem_mutex_);
  57. const auto it = mem_store_size_.find(offset);
  58. if (it != mem_store_size_.end()) {
  59. GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", it->second, offset, size, bytes);
  60. return it->second;
  61. }
  62. void *addr = nullptr;
  63. rtError_t rt_ret = rtMalloc(&addr, bytes, mem_type_);
  64. if (rt_ret != RT_ERROR_NONE) {
  65. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  66. return nullptr;
  67. }
  68. GELOGI("Acquire TS memory: %p, offset: %ld, size: %lu, align: %lu", addr, offset, size, bytes);
  69. mem_store_size_[offset] = addr;
  70. mem_store_addr_[addr] = offset;
  71. return addr;
  72. }
  73. void Release(void *addr) {
  74. std::lock_guard<std::mutex> lock(mem_mutex_);
  75. const auto it = mem_store_addr_.find(addr);
  76. if (it == mem_store_addr_.end()) {
  77. GELOGW("Not TS memory: %p.", addr);
  78. return;
  79. }
  80. GELOGI("Release TS memory: %p.", addr);
  81. mem_store_size_.erase(it->second);
  82. mem_store_addr_.erase(it);
  83. rtError_t ret = rtFree(addr);
  84. if (ret != RT_ERROR_NONE) {
  85. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", ret);
  86. }
  87. }
  88. private:
  89. std::mutex mem_mutex_;
  90. std::unordered_map<int64_t, void *> mem_store_size_;
  91. std::unordered_map<void *, int64_t> mem_store_addr_;
  92. rtMemType_t mem_type_;
  93. };
  94. } // namespace ge
  95. #endif // GE_GRAPH_LOAD_TS_MEM_MALL_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示