You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zero_copy_task.cc 4.5 kB

4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/new_model_manager/zero_copy_task.h"
  17. #include "framework/common/debug/ge_log.h"
  18. #include "framework/common/util.h"
  19. #include "graph/load/new_model_manager/model_utils.h"
  20. #include "common/ge_compiler_options.h"
  21. namespace ge {
  22. ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size)
  23. : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {}
  24. ZeroCopyTask::~ZeroCopyTask() { args_addr_ = nullptr; }
  25. /**
  26. * @ingroup ge
  27. * @brief Set Task zero copy addr info.
  28. * @param [in] addr: task addr value.
  29. * @param [in] offset: saved offset in task args.
  30. * @return: 0 SUCCESS / others FAILED
  31. */
  32. Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) {
  33. if (offset + sizeof(uintptr_t) > args_size_) {
  34. GELOGE(FAILED, "[ZCPY] %s set task args failed, args size: %zu, offset: %zu", name_.c_str(), args_size_, offset);
  35. return FAILED; // unexpected error, need fix.
  36. }
  37. auto it = task_addr_offset_.find(addr);
  38. if (it == task_addr_offset_.end()) {
  39. task_addr_offset_[addr] = {offset};
  40. } else {
  41. it->second.insert(offset);
  42. }
  43. GELOGD("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr,
  44. args_addr_, args_size_, offset);
  45. return SUCCESS;
  46. }
  47. /**
  48. * @ingroup ge
  49. * @brief Save orignal data of task args.
  50. * @param [in] info: task args orignal data.
  51. * @param [in] size: args size.
  52. * @return: void
  53. */
  54. void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) {
  55. GE_CHECK_NOTNULL_JUST_RETURN(info);
  56. const uint8_t *data = static_cast<const uint8_t *>(info);
  57. args_info_.assign(data, data + size);
  58. GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info,
  59. args_addr_, args_size_, size);
  60. }
  61. /**
  62. * @ingroup ge
  63. * @brief Set user data addr to Task param.
  64. * @param [in] addr: virtual address value from Op.
  65. * @param [in] buffer_addr: real_data_buffer_addr from user.
  66. * @return: void
  67. */
  68. Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) {
  69. auto iter = task_addr_offset_.find(addr);
  70. if (iter != task_addr_offset_.end()) {
  71. auto &cur_pair = *iter;
  72. uint8_t *args_info = args_info_.data();
  73. for (auto offset : cur_pair.second) {
  74. auto dst_addr = static_cast<uint8_t *>(buffer_addr);
  75. GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p",
  76. name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr);
  77. *reinterpret_cast<uintptr_t *>(args_info + offset)= reinterpret_cast<uintptr_t>(dst_addr);
  78. is_updated_ = true;
  79. }
  80. }
  81. return SUCCESS;
  82. }
  83. /**
  84. * @ingroup ge
  85. * @brief Update task param to device.
  86. * @param [in] async_mode: true for asychronous mode.
  87. * @param [in] stream: Stream for asychronous update.
  88. * @return: 0 SUCCESS / others FAILED
  89. */
  90. Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) {
  91. if (!is_updated_) {
  92. return SUCCESS;
  93. }
  94. is_updated_ = false;
  95. GE_CHECK_NOTNULL(args_addr_);
  96. rtError_t rt_err = RT_ERROR_NONE;
  97. if (async_mode) {
  98. rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX,
  99. stream);
  100. } else {
  101. GE_BUILTIN_PREFETCH(args_addr_);
  102. rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE);
  103. }
  104. if (rt_err != RT_ERROR_NONE) {
  105. GELOGE(RT_FAILED, "[ZCPY] %s distribute task param failed, error=0x%x", name_.c_str(), rt_err);
  106. return RT_ERROR_TO_GE_STATUS(rt_err);
  107. }
  108. GELOGD("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(),
  109. args_addr_, args_size_, args_info_.data(), args_info_.size());
  110. return SUCCESS;
  111. }
  112. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示