You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zero_copy_offset.cc 10 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/model_manager/zero_copy_offset.h"
  17. #include "framework/common/debug/ge_log.h"
  18. #include "framework/common/util.h"
  19. #include "graph/load/model_manager/model_utils.h"
  20. #include "graph/load/model_manager/zero_copy_task.h"
  21. namespace ge {
  22. namespace {
  23. const uint32_t kDataIndex = 0;
  24. } // namespace
  25. ZeroCopyOffset::ZeroCopyOffset() {}
  26. ZeroCopyOffset::~ZeroCopyOffset() {}
  27. Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc,
  28. bool &fusion_flag) {
  29. GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p",
  30. op_desc->GetName().c_str(), output_size, virtual_addr);
  31. basic_addr_ = virtual_addr;
  32. op_name_ = op_desc->GetName();
  33. (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_);
  34. (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_);
  35. GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(),
  36. REPORT_INNER_ERROR("E19999", "basic_offset_size:%zu not equal to relative_offset_size:%zu, "
  37. "check invalid", zero_copy_basic_offset_.size(),
  38. zero_copy_relative_offset_.size());
  39. return PARAM_INVALID,
  40. "[Check][Param] basic_offset_size:%zu should be equal to relative_offset_size:%zu",
  41. zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size());
  42. GELOGD("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size());
  43. int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex);
  44. IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag);
  45. uint32_t out_count = 0;
  46. data_size_ = output_size;
  47. if (!fusion_flag) {
  48. out_count++;
  49. data_info_.emplace_back(output_size, virtual_addr);
  50. relative_offset_.emplace_back(0);
  51. GELOGD("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size, virtual_addr);
  52. } else {
  53. GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str());
  54. for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) {
  55. if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) {
  56. out_count++;
  57. uint64_t out_offset = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_addr)) +
  58. zero_copy_relative_offset_.at(index);
  59. data_info_.emplace_back(output_size, reinterpret_cast<void *>(static_cast<uintptr_t>(out_offset)));
  60. relative_offset_.emplace_back(zero_copy_relative_offset_.at(index));
  61. GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %lu, need copy data_size is %ld.", basic_addr_,
  62. out_offset, output_size);
  63. }
  64. }
  65. }
  66. data_count_ = out_count;
  67. return SUCCESS;
  68. }
  69. Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list,
  70. const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc,
  71. const size_t &idx, bool &fusion_flag) {
  72. int64_t size = input_size_list[idx];
  73. auto tensor_desc = op_desc->GetInputDescPtr(idx);
  74. GE_CHECK_NOTNULL(tensor_desc);
  75. if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS) {
  76. REPORT_INNER_ERROR("E19999", "Get input TensorSize in op:%s(%s) failed, input_index:%zu",
  77. op_desc->GetName().c_str(), op_desc->GetType().c_str(), idx);
  78. GELOGE(FAILED, "[Get][InputTensorSize] in op:%s(%s) failed, input_index:%zu",
  79. op_desc->GetName().c_str(), op_desc->GetType().c_str(), idx);
  80. return FAILED;
  81. }
  82. GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size);
  83. basic_addr_ = virtual_addr_list[idx];
  84. op_name_ = op_desc->GetName();
  85. (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_);
  86. (void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_);
  87. GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(),
  88. REPORT_INNER_ERROR("E19999", "basic_offset_size:%zu not equal to relative_offset_size:%zu, "
  89. "check invalid",
  90. zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size());
  91. return PARAM_INVALID,
  92. "[Check][Param] basic_offset_size:%zu should be equal to relative_offset_size:%zu",
  93. zero_copy_basic_offset_.size(), zero_copy_relative_offset_.size());
  94. int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx);
  95. IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag);
  96. uint32_t in_count = 0;
  97. data_size_ = size;
  98. if (!fusion_flag) {
  99. in_count++;
  100. data_info_.emplace_back(size, virtual_addr_list[idx]);
  101. // op_desc not set l2fusion when fusion_flag is false
  102. relative_offset_.emplace_back(0);
  103. GELOGI("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), size, virtual_addr_list[idx]);
  104. } else {
  105. GELOGI("[ZCPY] set l2-fusion for %s.", op_desc->GetName().c_str());
  106. for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) {
  107. if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) {
  108. in_count++;
  109. uint64_t in_offset = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_addr_list[idx])) +
  110. zero_copy_relative_offset_.at(index);
  111. int64_t real_data_size = ModelUtils::GetInputSize(op_desc).at(idx);
  112. data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(static_cast<uintptr_t>(in_offset)));
  113. relative_offset_.emplace_back(zero_copy_relative_offset_.at(index));
  114. GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %lu, need copy data_size is %ld.", basic_addr_,
  115. in_offset, real_data_size);
  116. }
  117. }
  118. }
  119. data_count_ = in_count;
  120. return SUCCESS;
  121. }
  122. void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_offset,
  123. bool &fusion_flag) {
  124. for (size_t fusion_count = 0; fusion_count < fusion_basic_addrs.size(); ++fusion_count) {
  125. if (fusion_basic_addrs.at(fusion_count) == tensor_offset) {
  126. fusion_flag = true;
  127. break;
  128. }
  129. }
  130. }
  131. void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
  132. set<const void *> &real_virtual_addrs) {
  133. uint32_t out_count = 0;
  134. if (!fusion_flag) {
  135. out_count++;
  136. std::map<const void *, std::vector<void *>> addr_mapping;
  137. addr_mapping[addr] = {};
  138. outside_addrs_.emplace_back(addr_mapping);
  139. real_virtual_addrs.insert(addr);
  140. } else {
  141. GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
  142. for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
  143. if (zero_copy_basic_offset_.at(i) == output_offset) {
  144. out_count++;
  145. void *virtual_addr =
  146. reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(addr) + zero_copy_relative_offset_.at(i));
  147. std::map<const void *, std::vector<void *>> addr_mapping;
  148. addr_mapping[virtual_addr] = {};
  149. outside_addrs_.emplace_back(addr_mapping);
  150. real_virtual_addrs.insert(virtual_addr);
  151. GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr);
  152. }
  153. }
  154. }
  155. addr_count_ = out_count;
  156. valid_relative_offset_ = true;
  157. }
  158. void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
  159. std::vector<void *> &tensor_addrs) {
  160. GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr %p.", addr);
  161. uint32_t out_count = 0;
  162. if (!fusion_flag) {
  163. out_count++;
  164. std::map<const void *, std::vector<void *>> addr_mapping;
  165. addr_mapping[addr] = {};
  166. outside_addrs_.emplace_back(addr_mapping);
  167. tensor_addrs.emplace_back(addr);
  168. } else {
  169. GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
  170. for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
  171. if (zero_copy_basic_offset_.at(i) == input_offset) {
  172. out_count++;
  173. void *virtual_addr =
  174. reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(addr) + zero_copy_relative_offset_.at(i));
  175. std::map<const void *, std::vector<void *>> addr_mapping;
  176. addr_mapping[virtual_addr] = {};
  177. outside_addrs_.emplace_back(addr_mapping);
  178. tensor_addrs.emplace_back(virtual_addr);
  179. GELOGI("[ZCPY] virtual_addr %p has been fusion to virtual_addr %p.", addr, virtual_addr);
  180. }
  181. }
  182. }
  183. addr_count_ = out_count;
  184. valid_relative_offset_ = true;
  185. }
  186. void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
  187. if (!valid_relative_offset_) {
  188. return;
  189. }
  190. const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
  191. for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
  192. auto args_addrs = outside_addrs_[out_count].find(outside_addr);
  193. if (args_addrs != outside_addrs_[out_count].end()) {
  194. GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset),
  195. "[Set][TaskArgsOffset] failed, Input args invalid, offset:%zu.", offset);
  196. void *args_val = static_cast<uint8_t *>(args) + offset;
  197. args_addrs->second.push_back(args_val);
  198. GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
  199. args, offset);
  200. }
  201. }
  202. }
  203. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示