You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hccl_task.cc 9.9 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ge_runtime/task/hccl_task.h"
  17. #include <algorithm>
  18. #include "ge_runtime/task/task_factory.h"
  19. #include "common/opskernel/ops_kernel_info_store.h"
  20. #include "common/opskernel/ge_task_info.h"
  21. namespace ge {
  22. namespace model_runner {
  23. std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<HcclTask::StreamGuard>>>>
  24. HcclTask::model_stream_mapping_;
  25. std::mutex HcclTask::model_stream_mapping_mutex_;
  26. HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info)
  27. : TaskRepeater<HcclTaskInfo>(model_context, task_info),
  28. task_info_(task_info),
  29. stream_(nullptr),
  30. workspace_mem_(nullptr),
  31. rt_model_handle_(nullptr),
  32. priority_(0),
  33. secondary_stream_list_() {
  34. if (task_info_ == nullptr) {
  35. GELOGW("task_info_ is null!");
  36. return;
  37. }
  38. priority_ = model_context.priority();
  39. rt_model_handle_ = model_context.rt_model_handle();
  40. auto stream_list = model_context.stream_list();
  41. if (stream_list.size() == 1) {
  42. stream_ = stream_list[0];
  43. } else if (stream_list.size() > task_info->stream_id()) {
  44. stream_ = stream_list[task_info->stream_id()];
  45. } else {
  46. GELOGW("Index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size());
  47. }
  48. }
  49. HcclTask::~HcclTask() {}
  50. bool HcclTask::Distribute() {
  51. // Ops kernel info store
  52. // Get privateDef and opsKernelStorePtr
  53. GELOGI("Get custom info in modelTaskDef");
  54. void *ops_kernel_store = task_info_->ops_kernel_store();
  55. OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store);
  56. if (ops_kernel_store == nullptr) {
  57. GELOGE(PARAM_INVALID, "No hcom distribute function ptr and no ops kernel store.");
  58. return false;
  59. }
  60. char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data()));
  61. auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size());
  62. GELOGI("The first address of the custom info, privateDef=%p", private_def);
  63. SetSecondaryStream();
  64. if (task_info_->workspace_size() > 0) {
  65. workspace_mem_ = task_info_->workspace_addr();
  66. }
  67. GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
  68. GETaskInfo ge_task;
  69. ge_task.id = 0;
  70. ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL);
  71. ge_task.stream = stream_;
  72. ge_task.kernelHcclInfo = std::vector<GETaskKernelHcclInfo>(1);
  73. ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type();
  74. ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr();
  75. ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr();
  76. ge_task.kernelHcclInfo[0].workSpaceAddr = workspace_mem_;
  77. ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size();
  78. ge_task.kernelHcclInfo[0].count = task_info_->count();
  79. ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type());
  80. ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type());
  81. ge_task.kernelHcclInfo[0].rootId = task_info_->root_id();
  82. std::vector<rtStream_t> secondary_stream_list;
  83. std::transform(secondary_stream_list_.begin(), secondary_stream_list_.end(),
  84. std::back_inserter(secondary_stream_list),
  85. [](const std::shared_ptr<StreamGuard> &stream) -> rtStream_t { return stream->GetStream(); });
  86. ge_task.kernelHcclInfo[0].hcclStreamList = secondary_stream_list;
  87. ge_task.privateDef = private_def;
  88. ge_task.privateDefLen = private_def_len;
  89. ge_task.opsKernelStorePtr = ops_kernel_store;
  90. auto result = ops_kernel_info_store->LoadTask(ge_task);
  91. // tagHcclResult::HCCL_SUCCESS is 0
  92. if (result != 0) {
  93. GELOGE(INTERNAL_ERROR, "davinci_model : load task fail, return ret: %u", result);
  94. return false;
  95. }
  96. GELOGI("Call function LoadTask end.");
  97. return true;
  98. }
  99. bool HcclTask::SetSecondaryStream() {
  100. const uint32_t master_stream_id = task_info_->stream_id();
  101. const int64_t hccl_secondary_stream_num = task_info_->hccl_stream_num();
  102. Status ret;
  103. std::lock_guard<std::mutex> lock(model_stream_mapping_mutex_);
  104. if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) {
  105. GELOGI("Need to create map for rt_model_handle_:%p with new mainstream %ld.", rt_model_handle_, master_stream_id);
  106. ret = CreateStream(hccl_secondary_stream_num, master_stream_id);
  107. if (!ret) {
  108. GELOGE(RT_FAILED, "Create hccl stream failed.");
  109. return false;
  110. }
  111. return true;
  112. }
  113. std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map =
  114. model_stream_mapping_.at(rt_model_handle_);
  115. auto iter = master_secondary_stream_map.find(master_stream_id);
  116. if (iter != master_secondary_stream_map.end()) {
  117. std::vector<std::weak_ptr<StreamGuard>> &secondary_stream_vec = iter->second;
  118. auto lock_weak_ptr = [&secondary_stream_vec, this](int64_t index) -> bool {
  119. auto stream = secondary_stream_vec[index].lock();
  120. if (stream == nullptr) {
  121. rtStream_t new_stream = nullptr;
  122. bool ret = CreateStream(rt_model_handle_, &new_stream);
  123. if (!ret) {
  124. GELOGE(FAILED, "CreateStream failed.");
  125. return false;
  126. }
  127. stream = std::make_shared<HcclTask::StreamGuard>(rt_model_handle_, new_stream);
  128. if (stream == nullptr) {
  129. GELOGE(FAILED, "MakeShared failed.");
  130. return false;
  131. }
  132. secondary_stream_vec[index] = stream;
  133. }
  134. secondary_stream_list_.push_back(stream);
  135. return true;
  136. };
  137. if (static_cast<size_t>(hccl_secondary_stream_num) <= secondary_stream_vec.size()) {
  138. GELOGI("Number of secondary stream is enough to be reused.");
  139. for (int64_t i = 0; i < hccl_secondary_stream_num; ++i) {
  140. if (!lock_weak_ptr(i)) {
  141. GELOGE(FAILED, "Lock weak ptr failed.");
  142. return false;
  143. }
  144. }
  145. } else {
  146. GELOGI("Need to reuse secondary stream and create new secondary stream.");
  147. size_t created_stream_num = secondary_stream_vec.size();
  148. for (size_t i = 0; i < secondary_stream_vec.size(); ++i) {
  149. if (!lock_weak_ptr(i)) {
  150. GELOGE(FAILED, "Lock weak ptr failed.");
  151. return false;
  152. }
  153. }
  154. ret = CreateStream(hccl_secondary_stream_num - created_stream_num, master_stream_id);
  155. if (ret != SUCCESS) {
  156. GELOGE(RT_FAILED, "Create hccl stream failed.");
  157. return false;
  158. }
  159. }
  160. GELOGI("Initialize hccl secondary stream success, hccl_secondary_stream_num =%ld", hccl_secondary_stream_num);
  161. } else {
  162. GELOGI("Need to create secondary stream for %s with new mainstream %ld.", task_info_->op_name().c_str(),
  163. master_stream_id);
  164. ret = CreateStream(hccl_secondary_stream_num, master_stream_id);
  165. if (!ret) {
  166. GELOGE(RT_FAILED, "Create hccl stream failed.");
  167. return false;
  168. }
  169. }
  170. return true;
  171. }
  172. bool HcclTask::CreateStream(int64_t stream_num, int64_t master_stream_id) {
  173. GELOGI("Start to create %ld hccl secondary stream.", stream_num);
  174. for (int64_t i = 0; i < stream_num; ++i) {
  175. rtStream_t stream = nullptr;
  176. bool ret = CreateStream(rt_model_handle_, &stream);
  177. if (!ret) {
  178. GELOGE(FAILED, "CreateStream failed.");
  179. return false;
  180. }
  181. GELOGD("hccl_stream addr is=%p", stream);
  182. auto shared_stream = std::make_shared<StreamGuard>(rt_model_handle_, stream);
  183. if (shared_stream == nullptr) {
  184. GELOGE(FAILED, "MakeShared failed.");
  185. return false;
  186. }
  187. SaveHcclSecondaryStream(master_stream_id, shared_stream);
  188. secondary_stream_list_.push_back(shared_stream);
  189. }
  190. GELOGI("CreateStream success.");
  191. return true;
  192. }
  193. bool HcclTask::CreateStream(rtModel_t model, rtStream_t *stream) const {
  194. if (stream == nullptr) {
  195. GELOGE(FAILED, "Output param stream is null.");
  196. return false;
  197. }
  198. rtError_t rt_ret = rtStreamCreateWithFlags(stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
  199. if (rt_ret != RT_ERROR_NONE) {
  200. GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
  201. return false;
  202. }
  203. // Create secondary stream, inactive by default, activated by hccl
  204. rt_ret = rtModelBindStream(model, *stream, RT_MODEL_WAIT_ACTIVE_STREAM);
  205. if (rt_ret != RT_ERROR_NONE) {
  206. GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
  207. return false;
  208. }
  209. return true;
  210. }
  211. void HcclTask::SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream) {
  212. if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) {
  213. model_stream_mapping_.emplace(rt_model_handle_, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>());
  214. }
  215. std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map =
  216. model_stream_mapping_.at(rt_model_handle_);
  217. master_secondary_stream_map[master_stream_id].emplace_back(stream);
  218. }
  219. HcclTask::StreamGuard::~StreamGuard() {
  220. rtError_t rt_ret = rtModelUnbindStream(model_, stream_);
  221. if (rt_ret != RT_ERROR_NONE) {
  222. GELOGE(RT_FAILED, "Unbind stream from model failed!");
  223. return;
  224. }
  225. rt_ret = rtStreamDestroy(stream_);
  226. if (rt_ret != RT_ERROR_NONE) {
  227. GELOGE(RT_FAILED, "Destroy stream failed!");
  228. return;
  229. }
  230. }
  231. REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo);
  232. } // namespace model_runner
  233. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示