You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

tbe_task_builder.cc 12 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago

  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "single_op/task/tbe_task_builder.h"
  17. #include <mutex>
  18. #include <vector>
  19. #include "graph/debug/ge_attr_define.h"
  20. #include "graph/load/new_model_manager/model_utils.h"
  21. #include "graph/manager/graph_var_manager.h"
  22. #include "runtime/rt.h"
  23. #include "single_op/task/build_task_utils.h"
  24. namespace ge {
  25. namespace {
  26. constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
  27. constexpr char const *kAttrOpParamSize = "op_para_size";
  28. std::mutex g_reg_mutex;
  29. inline void GetKernelName(const OpDescPtr &op_desc, std::string &kernel_name) {
  30. (void)AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name);
  31. }
  32. inline TBEKernelPtr GetTbeKernel(const OpDescPtr &op_desc) {
  33. return op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  34. }
  35. } // namespace
  36. KernelHolder::KernelHolder(const char *stub_func, std::shared_ptr<ge::OpKernelBin> kernel_bin)
  37. : stub_func_(stub_func), bin_handle_(nullptr), kernel_bin_(std::move(kernel_bin)) {}
  38. KernelHolder::~KernelHolder() {
  39. if (bin_handle_ != nullptr) {
  40. GE_CHK_RT(rtDevBinaryUnRegister(bin_handle_));
  41. }
  42. }
  43. const char *KernelBinRegistry::GetUnique(const string &stub_func) {
  44. std::lock_guard<std::mutex> lock(mutex_);
  45. auto it = unique_stubs_.find(stub_func);
  46. if (it != unique_stubs_.end()) {
  47. return it->c_str();
  48. } else {
  49. it = unique_stubs_.insert(unique_stubs_.end(), stub_func);
  50. return it->c_str();
  51. }
  52. }
  53. const char *KernelBinRegistry::GetStubFunc(const std::string &stub_name) {
  54. std::lock_guard<std::mutex> lock(mutex_);
  55. auto iter = registered_bins_.find(stub_name);
  56. if (iter != registered_bins_.end()) {
  57. return iter->second->stub_func_;
  58. }
  59. return nullptr;
  60. }
  61. bool KernelBinRegistry::AddKernel(const std::string &stub_name, std::unique_ptr<KernelHolder> &&holder) {
  62. std::lock_guard<std::mutex> lock(mutex_);
  63. auto ret = registered_bins_.emplace(stub_name, std::move(holder));
  64. return ret.second;
  65. }
  66. TbeTaskBuilder::TbeTaskBuilder(const std::string &model_name, const NodePtr &node, const domi::KernelDef &kernel_def)
  67. : node_(node),
  68. op_desc_(node->GetOpDesc()),
  69. kernel_def_(kernel_def),
  70. stub_name_(model_name + "/" + node->GetName() + "_tvmbin") {}
  71. Status TbeTaskBuilder::DoRegisterBinary(const OpKernelBin &kernel_bin, void **bin_handle,
  72. const SingleOpModelParam &param) const {
  73. rtDevBinary_t binary;
  74. binary.version = 0;
  75. binary.data = kernel_bin.GetBinData();
  76. binary.length = kernel_bin.GetBinDataSize();
  77. binary.magic = param.core_type == 0 ? RT_DEV_BINARY_MAGIC_ELF : RT_DEV_BINARY_MAGIC_ELF_AIVEC;
  78. auto ret = rtDevBinaryRegister(&binary, bin_handle);
  79. if (ret != RT_ERROR_NONE) {
  80. GELOGE(ret, "rtDevBinaryRegister failed, bin key = %s, core_type = %ld, rt ret = %d", stub_name_.c_str(),
  81. param.core_type, static_cast<int>(ret));
  82. return ret;
  83. }
  84. return SUCCESS;
  85. }
  86. Status TbeTaskBuilder::DoRegisterMeta(void *bin_handle) {
  87. std::string meta_data;
  88. (void)AttrUtils::GetStr(op_desc_, TVM_ATTR_NAME_METADATA, meta_data);
  89. GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
  90. if (!meta_data.empty()) {
  91. auto rt_ret = rtMetadataRegister(bin_handle, meta_data.c_str());
  92. if (rt_ret != RT_ERROR_NONE) {
  93. GELOGE(rt_ret, "rtMetadataRegister failed. bin key = %s, meta_data = %s, rt ret = %d", stub_name_.c_str(),
  94. meta_data.c_str(), static_cast<int>(rt_ret));
  95. return rt_ret;
  96. }
  97. }
  98. return SUCCESS;
  99. }
  100. Status TbeTaskBuilder::DoRegisterFunction(void *bin_handle, const char *stub_name, const char *kernel_name) {
  101. auto rt_ret = rtFunctionRegister(bin_handle, stub_name, stub_name, kernel_name, FUNC_MODE_NORMAL);
  102. if (rt_ret != RT_ERROR_NONE) {
  103. GELOGE(rt_ret, "rtFunctionRegister failed. bin key = %s, kernel name = %s, rt ret = %d", stub_name, kernel_name,
  104. static_cast<int>(rt_ret));
  105. return rt_ret;
  106. }
  107. return SUCCESS;
  108. }
  109. Status TbeTaskBuilder::DoRegisterKernel(const ge::OpKernelBin &tbe_kernel, const char *bin_file_key, void **bin_handle,
  110. const SingleOpModelParam &param) {
  111. std::string kernel_name;
  112. GetKernelName(op_desc_, kernel_name);
  113. void *handle = nullptr;
  114. auto ret = DoRegisterBinary(tbe_kernel, &handle, param);
  115. if (ret != SUCCESS) {
  116. return ret;
  117. }
  118. ret = DoRegisterMeta(handle);
  119. if (ret != SUCCESS) {
  120. GE_CHK_RT(rtDevBinaryUnRegister(handle));
  121. return ret;
  122. }
  123. ret = DoRegisterFunction(handle, bin_file_key, kernel_name.c_str());
  124. if (ret != SUCCESS) {
  125. GE_CHK_RT(rtDevBinaryUnRegister(handle));
  126. return ret;
  127. }
  128. GELOGI("Register function succeeded: kernel_name = %s", kernel_name.c_str());
  129. *bin_handle = handle;
  130. return SUCCESS;
  131. }
  132. Status TbeTaskBuilder::RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param) {
  133. KernelBinRegistry &registry = KernelBinRegistry::GetInstance();
  134. // check if already registered
  135. const char *stub_func = registry.GetStubFunc(stub_name_);
  136. if (stub_func != nullptr) {
  137. task.SetStubFunc(stub_name_, stub_func);
  138. return SUCCESS;
  139. }
  140. // to avoid repeat register
  141. std::lock_guard<std::mutex> lock(g_reg_mutex);
  142. // check again
  143. stub_func = registry.GetStubFunc(stub_name_);
  144. if (stub_func == nullptr) {
  145. stub_func = registry.GetUnique(stub_name_);
  146. GELOGI("RegisterKernel begin, stub_func = %s", stub_func);
  147. auto tbe_kernel = GetTbeKernel(op_desc_);
  148. if (tbe_kernel == nullptr) {
  149. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "OP EXT ATTR NAME TBE_KERNEL not found. op = %s", op_desc_->GetName().c_str());
  150. return ACL_ERROR_GE_INTERNAL_ERROR;
  151. }
  152. auto holder = std::unique_ptr<KernelHolder>(new (std::nothrow) KernelHolder(stub_func, tbe_kernel));
  153. if (holder == nullptr) {
  154. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create KernelHodler failed.");
  155. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  156. }
  157. void *bin_handle = nullptr;
  158. auto ret = DoRegisterKernel(*tbe_kernel, stub_func, &bin_handle, param);
  159. if (ret == SUCCESS) {
  160. holder->SetBinHandle(bin_handle);
  161. if (!registry.AddKernel(stub_name_, std::move(holder))) {
  162. // should not happen. only one thread can reach here
  163. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add kernel failed. stub name = %s", stub_name_.c_str());
  164. return ACL_ERROR_GE_INTERNAL_ERROR;
  165. }
  166. }
  167. }
  168. task.SetStubFunc(stub_name_, stub_func);
  169. return SUCCESS;
  170. }
  171. Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const {
  172. const std::string &sm_desc_str = kernel_def_.sm_desc();
  173. if (sm_desc_str.empty()) {
  174. *sm_desc = nullptr;
  175. } else {
  176. GELOGD("To process sm desc, size = %zu", sm_desc_str.size());
  177. char *sm_control = const_cast<char *>(sm_desc_str.data());
  178. auto *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control);
  179. uint64_t gen_base_addr = param.base_addr;
  180. // There is no weight for te op now. Update L2_mirror_addr by data memory base.
  181. uint64_t data_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(param.mem_base)) - gen_base_addr;
  182. for (auto &data_index : l2_ctrl_info->data) {
  183. if (data_index.L2_mirror_addr != 0) {
  184. data_index.L2_mirror_addr += data_base_addr;
  185. }
  186. }
  187. auto rtRet = rtMemAllocManaged(sm_desc, sm_desc_str.size(), RT_MEMORY_SPM);
  188. if (rtRet != RT_ERROR_NONE) {
  189. GELOGE(rtRet, "rtMemAllocManaged failed, ret: %d", static_cast<int>(rtRet));
  190. return rtRet;
  191. }
  192. rtRet = rtMemcpy(*sm_desc, sm_desc_str.size(), sm_desc_str.data(), sm_desc_str.size(), RT_MEMCPY_HOST_TO_DEVICE);
  193. if (rtRet != RT_ERROR_NONE) {
  194. (void)rtMemFreeManaged(*sm_desc);
  195. GELOGE(rtRet, "rtMemcpy, ret: %d", static_cast<int>(rtRet));
  196. return rtRet;
  197. }
  198. }
  199. return SUCCESS;
  200. }
  201. Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc) {
  202. size_t arg_size = kernel_def_.args_size();
  203. auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
  204. GE_CHECK_NOTNULL(args);
  205. auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
  206. if (rtRet != RT_ERROR_NONE) {
  207. GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet));
  208. return rtRet;
  209. }
  210. const domi::KernelContext &context = kernel_def_.context();
  211. const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
  212. uint16_t offset = *args_offset_tmp;
  213. bool is_dynamic = false;
  214. (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
  215. if (is_dynamic) {
  216. GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
  217. } else {
  218. // copy args
  219. std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
  220. void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
  221. uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
  222. rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
  223. if (rtRet != RT_ERROR_NONE) {
  224. GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet));
  225. return rtRet;
  226. }
  227. }
  228. task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);
  229. return SUCCESS;
  230. }
  231. Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
  232. GELOGD("Build tbe task begin");
  233. auto ret = SetKernelArgs(task, param, op_desc_);
  234. if (ret != SUCCESS) {
  235. return ret;
  236. }
  237. ret = RegisterKernel(task, param);
  238. if (ret != SUCCESS) {
  239. return ret;
  240. }
  241. auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_);
  242. GELOGI("[TASK_INFO] %s %s", stub_name_.c_str(), task_info.c_str());
  243. void *stub_func = nullptr;
  244. auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func);
  245. if (rtRet != SUCCESS) {
  246. GELOGE(rtRet, "rtGetFunctionByName failed.");
  247. return rtRet;
  248. }
  249. task.SetStubFunc(stub_name_, stub_func);
  250. return SUCCESS;
  251. }
  252. Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
  253. GELOGD("Start alloc tiling data of node %s.", op_desc_->GetName().c_str());
  254. int64_t max_size = -1;
  255. (void)AttrUtils::GetInt(op_desc_, kAttrOpParamSize, max_size);
  256. GELOGD("Got op param size by key: %s, ret = %ld", kAttrOpParamSize, max_size);
  257. if (max_size <= 0) {
  258. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc_->GetName().c_str(), max_size);
  259. return ACL_ERROR_GE_PARAM_INVALID;
  260. }
  261. void *tiling_buffer = nullptr;
  262. GE_CHK_RT_RET(rtMalloc(&tiling_buffer, static_cast<uint64_t>(max_size), RT_MEMORY_HBM));
  263. GE_CHECK_NOTNULL(tiling_buffer);
  264. GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
  265. task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
  266. return SUCCESS;
  267. }
  268. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示