You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hccl_task.cc 5.9 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ge_runtime/task/hccl_task.h"
  17. #include "ge_runtime/task/task_factory.h"
  18. #include "common/opskernel/ops_kernel_info_store.h"
  19. #include "common/opskernel/ge_task_info.h"
  20. namespace ge {
  21. namespace model_runner {
  22. HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info)
  23. : TaskRepeater<HcclTaskInfo>(model_context, task_info),
  24. task_info_(task_info),
  25. stream_(nullptr),
  26. rt_model_handle_(nullptr),
  27. priority_(0),
  28. slave_stream_list_(),
  29. hcom_bind_model_(nullptr),
  30. hcom_unbind_model_(nullptr),
  31. hcom_distribute_task_(nullptr) {
  32. if (task_info_ == nullptr) {
  33. GELOGW("task_info_ is null!");
  34. }
  35. hcom_bind_model_ = task_info->hcom_bind_model();
  36. hcom_unbind_model_ = task_info->hcom_unbind_model();
  37. priority_ = model_context.priority();
  38. rt_model_handle_ = model_context.rt_model_handle();
  39. auto stream_list = model_context.stream_list();
  40. if (hcom_bind_model_ != nullptr) {
  41. if (rt_model_handle_list_.insert(rt_model_handle_).second) {
  42. for (auto stream : stream_list) {
  43. (void)hcom_bind_model_(rt_model_handle_, stream);
  44. }
  45. }
  46. }
  47. if (stream_list.size() == 1) {
  48. stream_ = stream_list[0];
  49. } else if (stream_list.size() > task_info->stream_id()) {
  50. stream_ = stream_list[task_info->stream_id()];
  51. } else {
  52. GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size());
  53. }
  54. }
  55. HcclTask::~HcclTask() {
  56. for (size_t i = 0; i < slave_stream_list_.size(); ++i) {
  57. rtError_t rt_ret = rtModelUnbindStream(rt_model_handle_, slave_stream_list_[i]);
  58. if (rt_ret != RT_ERROR_NONE) {
  59. GELOGE(RT_FAILED, "Unbind stream from model failed! Index: %zu", i);
  60. }
  61. }
  62. for (size_t i = 0; i < slave_stream_list_.size(); ++i) {
  63. rtError_t rt_ret = rtStreamDestroy(slave_stream_list_[i]);
  64. if (rt_ret != RT_ERROR_NONE) {
  65. GELOGE(RT_FAILED, "Destroy stream failed! Index: %zu", i);
  66. }
  67. }
  68. if (hcom_unbind_model_ != nullptr) {
  69. if (rt_model_handle_list_.find(rt_model_handle_) != rt_model_handle_list_.end()) {
  70. (void)hcom_unbind_model_(rt_model_handle_);
  71. (void)rt_model_handle_list_.erase(rt_model_handle_);
  72. }
  73. }
  74. }
  75. bool HcclTask::Distribute() {
  76. // No ops kernel info store
  77. hcom_distribute_task_ = task_info_->hcom_distribute_task();
  78. if (hcom_distribute_task_ != nullptr) {
  79. return hcom_distribute_task_(task_info_, stream_);
  80. }
  81. // Ops kernel info store
  82. // Get privateDef and opsKernelStorePtr
  83. GELOGI("get custom info in modelTaskDef");
  84. void *ops_kernel_store = task_info_->ops_kernel_store();
  85. OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store);
  86. if (ops_kernel_store == nullptr) {
  87. GELOGE(PARAM_INVALID, "No hcom distribute function ptr and no ops kernel store.");
  88. return false;
  89. }
  90. char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data()));
  91. auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size());
  92. GELOGI("the first address of the custom info, privateDef=%p", private_def);
  93. GELOGI("hcclStreamNum =%ld", task_info_->hccl_stream_num());
  94. for (int64_t i = 0; i < task_info_->hccl_stream_num(); ++i) {
  95. rtStream_t stream = nullptr;
  96. rtError_t rt_ret = rtStreamCreateWithFlags(&stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
  97. if (rt_ret != RT_ERROR_NONE) {
  98. GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
  99. return false;
  100. }
  101. rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM);
  102. if (rt_ret != RT_ERROR_NONE) {
  103. GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
  104. (void)rtStreamDestroy(stream);
  105. return false;
  106. }
  107. GELOGI("hccl_stream addr is=%p", stream);
  108. slave_stream_list_.push_back(stream);
  109. }
  110. GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
  111. GETaskInfo ge_task;
  112. ge_task.id = 0;
  113. ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL);
  114. ge_task.stream = stream_;
  115. ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type();
  116. ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr();
  117. ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr();
  118. ge_task.kernelHcclInfo[0].workSpaceAddr = task_info_->workspace_addr();
  119. ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size();
  120. ge_task.kernelHcclInfo[0].count = task_info_->count();
  121. ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type());
  122. ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type());
  123. ge_task.kernelHcclInfo[0].rootId = task_info_->root_id();
  124. ge_task.kernelHcclInfo[0].hcclStreamList = slave_stream_list_;
  125. ge_task.privateDef = private_def;
  126. ge_task.privateDefLen = private_def_len;
  127. ge_task.opsKernelStorePtr = ops_kernel_store;
  128. auto result = ops_kernel_info_store->LoadTask(ge_task);
  129. // tagHcclResult::HCCL_SUCCESS is 0
  130. if (result != 0) {
  131. GELOGE(INTERNAL_ERROR, "davinci_model : load task fail, return ret: %u", result);
  132. return false;
  133. }
  134. GELOGI("call function LoadTask end.");
  135. return true;
  136. }
  137. REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo);
  138. } // namespace model_runner
  139. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示