You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

single_op_model.cc 33 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "single_op/single_op_model.h"
  17. #include <atomic>
  18. #include <memory>
  19. #include <string>
  20. #include <vector>
  21. #include "framework/common/debug/ge_log.h"
  22. #include "graph/debug/ge_attr_define.h"
  23. #include "graph/load/model_manager/model_utils.h"
  24. #include "graph/utils/attr_utils.h"
  25. #include "graph/utils/graph_utils.h"
  26. #include "graph/utils/tensor_utils.h"
  27. #include "runtime/rt.h"
  28. #include "single_op/task/aicpu_task_builder.h"
  29. #include "single_op/task/aicpu_kernel_task_builder.h"
  30. #include "single_op/task/rts_kernel_task_builder.h"
  31. #include "single_op/task/tbe_task_builder.h"
  32. #include "hybrid/executor/hybrid_model_executor.h"
  33. #include "hybrid/node_executor/node_executor.h"
  34. static std::atomic<std::uint64_t> aicpu_kernel_id(0);
  35. using domi::TaskDef;
  36. using std::unique_ptr;
  37. using std::vector;
  38. namespace ge {
  39. namespace {
  40. const size_t kDataOutputNum = 1;
  41. const uint32_t kInputIndexOfData = 0;
  42. const uint32_t kOutputIndexOfData = 0;
  43. const size_t kNumTaskWithAtomicAddrCleanTask = 2;
  44. const size_t kNumTaskWithMemCpyTask = 2;
  45. constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
  46. const char *const kEngineNameAiCore = "AIcoreEngine";
  47. const char *const kEngineNameAiCpu = "aicpu_ascend_kernel";
  48. const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel";
  49. Status CheckHostMem(const std::vector<string> &dependencies, const NodePtr &node, bool &is_host_mem) {
  50. auto op_desc = node->GetOpDesc();
  51. for (const auto &input_name : dependencies) {
  52. int input_index = op_desc->GetInputIndexByName(input_name);
  53. if (input_index < 0) {
  54. GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
  55. node->GetName().c_str(), input_name.c_str());
  56. REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed, node:[%s] inputname: %s.",
  57. node->GetName().c_str(), input_name.c_str());
  58. return INTERNAL_ERROR;
  59. }
  60. const auto &src_node = NodeUtils::GetInDataNodeByIndex(*node, input_index);
  61. GE_CHECK_NOTNULL(src_node);
  62. auto src_op_desc = src_node->GetOpDesc();
  63. GE_CHECK_NOTNULL(src_op_desc);
  64. if (src_op_desc->GetType() == DATA) {
  65. auto tensor = src_op_desc->MutableInputDesc(kInputIndexOfData);
  66. if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
  67. GELOGD("Get hostmem from node %s, inputname: %s.", src_node->GetName().c_str(), input_name.c_str());
  68. continue;
  69. }
  70. }
  71. is_host_mem = false;
  72. return SUCCESS;
  73. }
  74. is_host_mem = true;
  75. return SUCCESS;
  76. }
  77. Status CheckInferDepend(GeModelPtr &ge_model, bool &is_infer_depend, bool &is_host_mem) {
  78. auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  79. GE_CHECK_NOTNULL(comp_graph);
  80. for (const auto &node : comp_graph->GetAllNodes()) {
  81. GE_CHECK_NOTNULL(node);
  82. auto op_desc = node->GetOpDesc();
  83. GE_CHECK_NOTNULL(op_desc);
  84. const auto &depends = op_desc->GetOpInferDepends();
  85. bool support_dynamic_shape = false;
  86. (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape);
  87. if (!depends.empty() && support_dynamic_shape) {
  88. is_infer_depend = true;
  89. return CheckHostMem(depends, node, is_host_mem);
  90. }
  91. }
  92. return SUCCESS;
  93. }
  94. } // namespace
  95. SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size)
  96. : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {}
  97. Status SingleOpModel::Init() {
  98. GE_CHK_STATUS_RET_NOLOG(InitModel());
  99. return LoadAllNodes();
  100. }
  101. Status SingleOpModel::InitModel() {
  102. ge::ModelData model;
  103. model.model_len = ori_model_size_;
  104. model.model_data = const_cast<void *>(ori_model_data_);
  105. auto ret = model_helper_.LoadModel(model);
  106. if (ret != SUCCESS) {
  107. GELOGE(ret, "[Load][Model] failed.");
  108. REPORT_CALL_ERROR("E19999", "InitModel fail for ModelHelper LoadModel failed.");
  109. return ret;
  110. }
  111. return SUCCESS;
  112. }
  113. void SingleOpModel::ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param) {
  114. int64_t value = 0;
  115. bool ret = false;
  116. std::shared_ptr<ge::GeModel> model = model_helper.GetGeModel();
  117. GE_CHECK_NOTNULL_JUST_RETURN(model);
  118. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_MEMORY_SIZE, value);
  119. param.memory_size = ret ? static_cast<uint64_t>(value) : 0;
  120. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_ZERO_COPY_MEMORY_SIZE, value);
  121. param.zero_copy_mem_size = ret ? static_cast<uint64_t>(value) : 0;
  122. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_WEIGHT_SIZE, value);
  123. param.weight_size = ret ? static_cast<uint64_t>(value) : 0;
  124. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_BASE_ADDR, value);
  125. param.base_addr = ret ? static_cast<uint64_t>(value) : 0;
  126. ret = ge::AttrUtils::GetInt(model, MODEL_ATTR_TASK_GEN_WEIGHT_ADDR, value);
  127. param.weight_addr = ret ? static_cast<uint64_t>(value) : 0;
  128. ret = ge::AttrUtils::GetInt(model, ATTR_MODEL_CORE_TYPE, value);
  129. param.core_type = ret ? value : 0;
  130. GELOGI("ParseOpModelParams(), total_memory_size:%lu, zero_copy_size:%lu, weight_size:%lu, core_type = %lu",
  131. param.memory_size, param.zero_copy_mem_size, param.weight_size, param.core_type);
  132. }
  133. Status SingleOpModel::InitModelMem(StreamResource &res) {
  134. ParseOpModelParams(model_helper_, model_params_);
  135. if (model_params_.memory_size > model_params_.zero_copy_mem_size) {
  136. const string purpose("malloc feature map memory on model execute.");
  137. GELOGI("total memory: %lu, zero_copy_mem: %lu", model_params_.memory_size, model_params_.zero_copy_mem_size);
  138. model_params_.mem_base =
  139. res.MallocMemory(purpose, model_params_.memory_size - model_params_.zero_copy_mem_size, false);
  140. if (model_params_.mem_base == nullptr) {
  141. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  142. }
  143. }
  144. if (model_params_.weight_size > 0 && has_weight_) {
  145. const string purpose("malloc weights memory on model execute.");
  146. model_params_.weight_base = res.MallocWeight(purpose, model_params_.weight_size);
  147. if (model_params_.weight_base == nullptr) {
  148. // no need to free memory, for that was handled by StreamResources
  149. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  150. }
  151. auto weight_buffer = model_helper_.GetGeModel()->GetWeight();
  152. GELOGI("To copy weight to device. weight size = %zu", weight_buffer.GetSize());
  153. GE_CHK_RT_RET(rtMemcpy(model_params_.weight_base,
  154. model_params_.weight_size,
  155. weight_buffer.GetData(),
  156. weight_buffer.GetSize(),
  157. RT_MEMCPY_HOST_TO_DEVICE));
  158. }
  159. return SUCCESS;
  160. }
  161. Status SingleOpModel::ParseInputNode(const OpDescPtr &op_desc) {
  162. vector<int64_t> offsets = op_desc->GetOutputOffset();
  163. if (offsets.size() != kDataOutputNum) {
  164. GELOGE(ACL_ERROR_GE_PARAM_INVALID,
  165. "[Parse][InputNode]Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
  166. op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
  167. REPORT_INNER_ERROR("E19999", "ParseInputNode fail for Data op should have only one output, but got %zu,"
  168. "op_name:%s, op_type:%s.", op_desc->GetOutputOffset().size(),
  169. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  170. return ACL_ERROR_GE_PARAM_INVALID;
  171. }
  172. auto output_desc = op_desc->GetOutputDescPtr(0);
  173. GE_CHECK_NOTNULL(output_desc);
  174. int64_t tensor_size = 0;
  175. (void)TensorUtils::GetSize(*output_desc, tensor_size);
  176. input_offset_list_.emplace_back(offsets[0]);
  177. input_sizes_.emplace_back(tensor_size);
  178. GELOGI("[%s] parse input node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  179. tensor_size, static_cast<uint32_t>(offsets[0]));
  180. return SUCCESS;
  181. }
  182. void SingleOpModel::ParseOutputNode(const OpDescPtr &op_desc) {
  183. vector<int64_t> offsets = op_desc->GetInputOffset();
  184. for (uint32_t k = 0; k < static_cast<uint32_t>(offsets.size()); ++k) {
  185. auto input_desc = op_desc->GetInputDescPtr(k);
  186. if (input_desc == nullptr) {
  187. continue;
  188. }
  189. int64_t tensor_size = 0;
  190. (void)TensorUtils::GetSize(*input_desc, tensor_size);
  191. output_offset_list_.emplace_back(offsets[k]);
  192. output_sizes_.emplace_back(tensor_size);
  193. GELOGI("[%s] parse output node: %s, size = %ld, offset = %u", model_name_.c_str(), op_desc->GetName().c_str(),
  194. tensor_size, static_cast<uint32_t>(offsets[k]));
  195. }
  196. }
  197. Status SingleOpModel::LoadAllNodes() {
  198. auto ge_model = model_helper_.GetGeModel();
  199. GE_CHECK_NOTNULL(ge_model);
  200. Graph graph = ge_model->GetGraph();
  201. model_id_ = ge_model->GetModelId();
  202. auto compute_graph = GraphUtils::GetComputeGraph(graph);
  203. if (compute_graph == nullptr) {
  204. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][ComputeGraph] fail, model_name:%s.", model_name_.c_str());
  205. REPORT_CALL_ERROR("E19999", "LoadAllNodes fail for GetComputeGraph return nullptr, model_name:%s.",
  206. model_name_.c_str());
  207. return ACL_ERROR_GE_INTERNAL_ERROR;
  208. }
  209. auto nodes = compute_graph->GetDirectNode();
  210. size_t model_op_size = nodes.size();
  211. GELOGI("[%s] node size = %zu", model_name_.c_str(), model_op_size);
  212. for (size_t i = 0; i < model_op_size; ++i) {
  213. auto node = nodes.at(i);
  214. auto op_desc = node->GetOpDesc();
  215. GE_CHECK_NOTNULL(op_desc);
  216. op_list_[op_desc->GetId()] = node;
  217. auto op_type = op_desc->GetType();
  218. GELOGI("[%s] node[%zu] = %s, type = %s", model_name_.c_str(), i, node->GetName().c_str(), op_type.c_str());
  219. if (op_type == DATA_TYPE || op_type == AIPP_DATA_TYPE) {
  220. data_ops_.emplace_back(op_desc);
  221. auto tensor = op_desc->MutableInputDesc(0);
  222. if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) {
  223. int32_t index = 0;
  224. (void) AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index);
  225. GELOGD("Node %s, index %d, has host mem.", node->GetName().c_str(), index);
  226. op_with_hostmem_[index] = node;
  227. }
  228. continue;
  229. }
  230. if (op_type == CONSTANT || op_type == CONSTANTOP) {
  231. has_weight_ = true;
  232. continue;
  233. }
  234. if (op_type == NETOUTPUT) {
  235. netoutput_op_ = op_desc;
  236. continue;
  237. }
  238. ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(op_desc);
  239. ge_model->GetCustAICPUKernelStore().LoadCustAICPUKernelBinToOpDesc(op_desc);
  240. }
  241. return SUCCESS;
  242. }
  243. Status SingleOpModel::ParseInputsAndOutputs() {
  244. for (auto &op_desc : data_ops_) {
  245. GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc));
  246. }
  247. if (netoutput_op_ != nullptr) {
  248. ParseOutputNode(netoutput_op_);
  249. }
  250. return SUCCESS;
  251. }
  252. Status SingleOpModel::SetInputsAndOutputs(SingleOp &single_op) {
  253. int arg_index = 0;
  254. for (size_t i = 0; i < input_offset_list_.size(); ++i) {
  255. auto *addr = model_params_.mem_base + input_offset_list_[i];
  256. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  257. single_op.input_sizes_.emplace_back(input_sizes_[i]);
  258. single_op.input_addr_list_.emplace_back(addr);
  259. }
  260. for (size_t i = 0; i < output_offset_list_.size(); ++i) {
  261. auto *addr = model_params_.mem_base + output_offset_list_[i];
  262. model_params_.addr_mapping_.emplace(reinterpret_cast<uintptr_t>(addr), arg_index++);
  263. single_op.output_sizes_.emplace_back(output_sizes_[i]);
  264. single_op.output_addr_list_.emplace_back(addr);
  265. }
  266. single_op.args_.resize(arg_index);
  267. return SUCCESS;
  268. }
  269. Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &single_op) {
  270. auto ge_model = model_helper_.GetGeModel();
  271. GE_CHECK_NOTNULL(ge_model);
  272. single_op.arg_table_.resize(single_op.input_sizes_.size() + single_op.output_sizes_.size());
  273. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  274. for (int i = 0; i < tasks.size(); ++i) {
  275. const TaskDef &task_def = tasks[i];
  276. GELOGI("[%s] Task[%d], type = %u, DebugString = %s", model_name_.c_str(), i, task_def.type(),
  277. task_def.DebugString().c_str());
  278. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  279. if (task_type == RT_MODEL_TASK_KERNEL || task_type == RT_MODEL_TASK_ALL_KERNEL) {
  280. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
  281. task_def.kernel_with_handle().context();
  282. auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
  283. if (kernel_type == ccKernelType::TE) {
  284. GELOGD("Building TBE task");
  285. TbeOpTask *tbe_task = nullptr;
  286. auto ret = BuildKernelTask(task_def, &tbe_task);
  287. if (ret != SUCCESS) {
  288. return ret;
  289. }
  290. ParseArgTable(tbe_task, single_op);
  291. tbe_task->SetModelArgs(model_name_, model_id_);
  292. if (tbe_task->tiling_buffer_ != nullptr) {
  293. tbe_task->stream_resource_ = stream_resource;
  294. }
  295. single_op.tasks_.emplace_back(tbe_task);
  296. } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
  297. GELOGD("Building AICPU_CC task");
  298. OpTask *task = nullptr;
  299. uint64_t singleop_kernel_id = aicpu_kernel_id++;
  300. GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id);
  301. GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id));
  302. task->SetModelArgs(model_name_, model_id_);
  303. ParseArgTable(task, single_op);
  304. single_op.tasks_.emplace_back(task);
  305. } else {
  306. GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
  307. "[Check][KernelType]Only TBE, AI_CPU, CUST_AI_CPU kernel are supported, but got %u",
  308. context.kernel_type());
  309. REPORT_INNER_ERROR("E19999",
  310. "BuildTaskList fail for %u not supported, Only TBE, AI_CPU, CUST_AI_CPU kernel are supported.",
  311. context.kernel_type());
  312. return ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID;
  313. }
  314. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  315. GELOGD("Building AICPU_TF task");
  316. AiCpuTask *aicpu_task = nullptr;
  317. uint64_t singleop_kernel_id = aicpu_kernel_id++;
  318. GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id);
  319. GE_CHK_STATUS_RET_NOLOG(
  320. BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, singleop_kernel_id));
  321. aicpu_task->SetModelArgs(model_name_, model_id_);
  322. ParseArgTable(aicpu_task, single_op);
  323. single_op.tasks_.emplace_back(aicpu_task);
  324. } else if ((task_type == RT_MODEL_TASK_MEMCPY_ASYNC) || (task_type == RT_MODEL_TASK_MEMCPY_ADDR_ASYNC)) {
  325. auto kernel_def = task_def.memcpy_async();
  326. auto node = op_list_[kernel_def.op_index()];
  327. GE_CHECK_NOTNULL(node);
  328. auto op_desc = node->GetOpDesc();
  329. GE_CHECK_NOTNULL(op_desc);
  330. std::unique_ptr<MemcpyAsyncTask> task;
  331. GE_CHK_STATUS_RET_NOLOG(RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, model_params_, task));
  332. task->SetModelArgs(model_name_, model_id_);
  333. ParseArgTable(task.get(), single_op);
  334. single_op.tasks_.emplace_back(task.release());
  335. } else {
  336. // skip
  337. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  338. }
  339. }
  340. return SUCCESS;
  341. }
  342. void SingleOpModel::ParseArgTable(OpTask *task, SingleOp &op) {
  343. if (task == nullptr) {
  344. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Parse][ArgTable] fail for input OpTask is nullptr.");
  345. REPORT_INNER_ERROR("E19999", "ParseArgTable fail for input OpTask is nullptr.");
  346. return;
  347. }
  348. // args: addr1, addr2, addr3 ...
  349. uintptr_t *arg_base = nullptr;
  350. size_t arg_num = 0;
  351. task->GetIoAddr(arg_base, arg_num);
  352. for (size_t i = 0; i < arg_num; ++i) {
  353. uintptr_t *ptr_to_addr = arg_base + i;
  354. uintptr_t addr = *ptr_to_addr;
  355. auto iter = model_params_.addr_mapping_.find(addr);
  356. if (iter != model_params_.addr_mapping_.end()) {
  357. int arg_index = iter->second;
  358. GELOGI("%s args[%zu] mapped to user designated args[%d]", task->GetOpdesc()->GetName().c_str(), i, arg_index);
  359. op.arg_table_[iter->second].emplace_back(ptr_to_addr);
  360. }
  361. }
  362. }
  363. Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task) {
  364. GE_CHECK_NOTNULL(task);
  365. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  366. const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
  367. task_def.kernel_with_handle().context();
  368. auto iter = op_list_.find(context.op_index());
  369. if (iter == op_list_.end()) {
  370. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index());
  371. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index());
  372. return ACL_ERROR_GE_INTERNAL_ERROR;
  373. }
  374. std::unique_ptr<TbeOpTask> tbe_task(new (std::nothrow) TbeOpTask());
  375. if (tbe_task == nullptr) {
  376. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][TbeOpTask]failed.");
  377. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new TbeOpTask.");
  378. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  379. }
  380. auto builder = TbeTaskBuilder(model_name_, iter->second, task_def);
  381. auto ret = builder.BuildTask(*tbe_task, model_params_);
  382. if (ret != SUCCESS) {
  383. GELOGE(ret, "[Build][TbeOpTask]failed.");
  384. REPORT_INNER_ERROR("E19999", "[Build][TbeOpTask]failed.");
  385. return ret;
  386. }
  387. *task = tbe_task.release();
  388. return SUCCESS;
  389. }
  390. Status SingleOpModel::BuildAtomicTask(const domi::TaskDef &task_def, AtomicAddrCleanOpTask **task) {
  391. GE_CHECK_NOTNULL(task);
  392. const auto &context = task_def.kernel().context();
  393. auto iter = op_list_.find(context.op_index());
  394. if (iter == op_list_.end()) {
  395. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Param:TaskDef]op desc not found. op index = %u", context.op_index());
  396. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for op desc not found. op index = %u", context.op_index());
  397. return ACL_ERROR_GE_INTERNAL_ERROR;
  398. }
  399. std::unique_ptr<AtomicAddrCleanOpTask> atomic_task(new (std::nothrow) AtomicAddrCleanOpTask());
  400. if (atomic_task == nullptr) {
  401. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AtomicAddrCleanOpTask]failed.");
  402. REPORT_INNER_ERROR("E19999", "BuildKernelTask fail for new AtomicAddrCleanOpTask.");
  403. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  404. }
  405. auto builder = AtomicAddrCleanTaskBuilder(model_name_, iter->second, task_def);
  406. auto ret = builder.BuildTask(*atomic_task, model_params_);
  407. if (ret != SUCCESS) {
  408. GELOGE(ret, "[Build][AtomicAddrCleanOpTask]failed.");
  409. REPORT_INNER_ERROR("E19999", "[Build][AtomicAddrCleanOpTask]failed.");
  410. return ret;
  411. }
  412. *task = atomic_task.release();
  413. return SUCCESS;
  414. }
  415. Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id) {
  416. auto iter = op_list_.find(kernel_def.op_index());
  417. if (iter == op_list_.end()) {
  418. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
  419. "[Check][Param:KernelExDef]op not found. op index = %u", kernel_def.op_index());
  420. REPORT_INNER_ERROR("E19999",
  421. "BuildKernelExTask fail for param kernel_def, because op of kernel_def not found, op index:%u.",
  422. kernel_def.op_index());
  423. return ACL_ERROR_GE_INTERNAL_ERROR;
  424. }
  425. std::unique_ptr<AiCpuTask> aicpu_task(new (std::nothrow) AiCpuTask());
  426. if (aicpu_task == nullptr) {
  427. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuTask] failed.");
  428. REPORT_INNER_ERROR("E19999", "BuildKernelExTask fail for new AiCpuTask, model_name:%s.", model_name_.c_str());
  429. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  430. }
  431. auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  432. auto ret = builder.BuildTask(*aicpu_task, model_params_, kernel_id);
  433. if (ret != SUCCESS) {
  434. GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id);
  435. return ret;
  436. }
  437. *task = aicpu_task.release();
  438. return SUCCESS;
  439. }
  440. Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) {
  441. const auto &context = kernel_def.context();
  442. auto iter = op_list_.find(context.op_index());
  443. if (iter == op_list_.end()) {
  444. GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
  445. "[Check][Param:KernelDef] op desc not found. op index = %u", context.op_index());
  446. REPORT_INNER_ERROR("E19999",
  447. "BuildCpuKernelTask fail for kernel_def is invalid, because op of kernel_def not found, op index:%u.",
  448. context.op_index());
  449. return ACL_ERROR_GE_INTERNAL_ERROR;
  450. }
  451. std::unique_ptr<AiCpuCCTask> aicpucc_task(new (std::nothrow) AiCpuCCTask());
  452. if (aicpucc_task == nullptr) {
  453. GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[Create][AiCpuCCTask] failed");
  454. REPORT_INNER_ERROR("E19999", "BuildCpuKernelTask fail for new AiCpuCCTask, model_name:%s.", model_name_.c_str());
  455. return ACL_ERROR_GE_MEMORY_ALLOCATION;
  456. }
  457. auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def);
  458. auto ret = builder.BuildTask(*aicpucc_task, kernel_id, model_params_);
  459. if (ret != SUCCESS) {
  460. GELOGE(ret, "[Build][AiCpuCCTask]failed, kernel_id:%lu.", kernel_id);
  461. REPORT_CALL_ERROR("E19999", "BuildCpuKernelTask fail for build AiCpuTask, kernel_id:%lu.", kernel_id);
  462. return ret;
  463. }
  464. *task = aicpucc_task.release();
  465. return SUCCESS;
  466. }
  467. Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model,
  468. SingleOp &single_op) {
  469. for (const auto &op_desc : data_ops_) {
  470. auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData);
  471. GeTensorDesc tensor_desc(output_tensor_desc);
  472. single_op.inputs_desc_.emplace_back(tensor_desc);
  473. GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str());
  474. }
  475. GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
  476. auto root_model = model_helper_.GetGeRootModel();
  477. GE_CHECK_NOTNULL(root_model);
  478. root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
  479. root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
  480. single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
  481. GE_CHECK_NOTNULL(single_op.hybrid_model_);
  482. GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
  483. int32_t device_id = 0;
  484. GE_CHK_RT_RET(rtGetDevice(&device_id));
  485. single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
  486. device_id,
  487. resource.GetStream()));
  488. GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
  489. GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
  490. return SUCCESS;
  491. }
  492. Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
  493. GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs());
  494. GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  495. single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_));
  496. GE_CHECK_NOTNULL(single_op.running_param_);
  497. GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op));
  498. auto ge_model = model_helper_.GetGeModel();
  499. GE_CHECK_NOTNULL(ge_model);
  500. bool infer_depend_flag = false;
  501. bool is_host_mem = false;
  502. GE_CHK_STATUS_RET(CheckInferDepend(ge_model, infer_depend_flag, is_host_mem), "[Check][InferDepend] failed.");
  503. if (infer_depend_flag) {
  504. // construct single_op, do single op with HybridModelExecutor
  505. GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor.");
  506. return InitHybridModelExecutor(resource, ge_model, single_op);
  507. }
  508. return BuildTaskList(&resource, single_op);
  509. }
  510. Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) {
  511. auto ge_model = model_helper_.GetGeModel();
  512. GE_CHECK_NOTNULL(ge_model);
  513. auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  514. GE_CHECK_NOTNULL(compute_graph);
  515. single_op.compute_graph_ = compute_graph;
  516. if (node_tasks_.size() != 1) {
  517. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
  518. REPORT_INNER_ERROR("E19999", "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
  519. return ACL_ERROR_GE_PARAM_INVALID;
  520. }
  521. auto iter = node_tasks_.begin();
  522. auto node = iter->first;
  523. const auto &task_defs = iter->second;
  524. if (task_defs.size() <= 0 || task_defs.size() > kNumTaskWithAtomicAddrCleanTask) {
  525. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node size must be 1, but get %zu.", node_tasks_.size());
  526. REPORT_INNER_ERROR("E19999", "[Check][Size]task_defs size must be 1 or 2, but get %zu.", task_defs.size());
  527. return ACL_ERROR_GE_PARAM_INVALID;
  528. }
  529. GE_CHECK_NOTNULL(node);
  530. auto op_desc = node->GetOpDesc();
  531. GE_CHECK_NOTNULL(op_desc);
  532. const auto &lib_name = op_desc->GetOpKernelLibName();
  533. if (lib_name == kEngineNameAiCore) {
  534. GELOGD("Building TBE task.");
  535. const auto &task_def = task_defs.back();
  536. TbeOpTask *tbe_task = nullptr;
  537. GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
  538. tbe_task->SetModelArgs(model_name_, model_id_);
  539. if (tbe_task->tiling_buffer_ != nullptr) {
  540. GELOGD("tiling buffer is not nullptr.");
  541. tbe_task->stream_resource_ = stream_resource;
  542. }
  543. if (task_defs.size() == kNumTaskWithAtomicAddrCleanTask) {
  544. const auto &atomic_task_def = task_defs.front();
  545. AtomicAddrCleanOpTask *atomic_task = nullptr;
  546. GE_CHK_STATUS_RET_NOLOG(BuildAtomicTask(atomic_task_def, &atomic_task));
  547. GE_CHK_STATUS_RET_NOLOG(atomic_task->InitAtomicAddrCleanIndices());
  548. tbe_task->SetAtomicAddrCleanTask(atomic_task);
  549. }
  550. single_op.op_task_.reset(tbe_task);
  551. } else if (lib_name == kEngineNameAiCpu) {
  552. const auto &task_def = task_defs[0];
  553. GELOGD("Building AICPU_CC task");
  554. OpTask *task = nullptr;
  555. uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
  556. GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id);
  557. GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id));
  558. task->SetModelArgs(model_name_, model_id_);
  559. single_op.op_task_.reset(task);
  560. } else if (lib_name == kEngineNameAiCpuTf) {
  561. const auto &task_def = task_defs[0];
  562. GELOGD("Building AICPU_TF task");
  563. AiCpuTask *aicpu_task = nullptr;
  564. uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
  565. GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id);
  566. GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id));
  567. if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) {
  568. if (task_defs.size() < kNumTaskWithMemCpyTask) {
  569. GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found.");
  570. REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found.");
  571. return ACL_ERROR_GE_PARAM_INVALID;
  572. }
  573. const TaskDef &copy_task_def = task_defs[1];
  574. GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
  575. }
  576. aicpu_task->SetModelArgs(model_name_, model_id_);
  577. single_op.op_task_.reset(aicpu_task);
  578. }
  579. return SUCCESS;
  580. }
  581. Status SingleOpModel::NeedHybridModel(GeModelPtr &ge_model, bool &need_hybrid_model) {
  582. bool is_infer_depend = false;
  583. bool is_host_mem = false;
  584. GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed.");
  585. bool need_d2h_cpy = is_infer_depend && !is_host_mem;
  586. need_hybrid_model = need_d2h_cpy || node_tasks_.size() > 1;
  587. return SUCCESS;
  588. }
  589. Status SingleOpModel::ParseTasks() {
  590. auto ge_model = model_helper_.GetGeModel();
  591. GE_CHECK_NOTNULL(ge_model);
  592. auto tasks = ge_model->GetModelTaskDefPtr()->task();
  593. for (int i = 0; i < tasks.size(); ++i) {
  594. TaskDef &task_def = tasks[i];
  595. GELOGI("[%s] Task[%d], type = [%u], DebugString = [%s]", model_name_.c_str(), i, task_def.type(),
  596. task_def.DebugString().c_str());
  597. auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
  598. uint32_t op_index = 0;
  599. if (task_type == RT_MODEL_TASK_KERNEL) {
  600. op_index = task_def.kernel().context().op_index();
  601. } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
  602. op_index = task_def.kernel_ex().op_index();
  603. } else if (task_type == RT_MODEL_TASK_ALL_KERNEL) {
  604. op_index = task_def.kernel_with_handle().context().op_index();
  605. } else {
  606. GELOGD("Skip task type: %d", static_cast<int>(task_type));
  607. continue;
  608. }
  609. GELOGD("op_index = %u, task_type = %d", op_index, task_type);
  610. auto iter = op_list_.find(op_index);
  611. if (iter == op_list_.end()) {
  612. GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index);
  613. REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u.", op_index);
  614. return INTERNAL_ERROR;
  615. }
  616. auto &node = iter->second;
  617. node_tasks_[node].emplace_back(task_def);
  618. }
  619. return SUCCESS;
  620. }
  621. Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &single_op) {
  622. single_op.num_inputs_ = data_ops_.size();
  623. single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
  624. GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
  625. model_params_.memory_size = UINT64_MAX;
  626. model_params_.graph_is_dynamic = true;
  627. GE_CHK_STATUS_RET(ParseTasks(), "[Parse][Tasks] failed.");
  628. auto ge_model = model_helper_.GetGeModel();
  629. GE_CHECK_NOTNULL(ge_model);
  630. bool need_hybrid_model = false;
  631. GE_CHK_STATUS_RET(NeedHybridModel(ge_model, need_hybrid_model), "[Check][NeedHybridModel] failed.");
  632. if (need_hybrid_model) {
  633. GELOGD("Build single op HybridModel.");
  634. GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
  635. GE_CHK_STATUS(SetHostMemTensor(single_op), "[Init][HostMem]Failed.");
  636. auto root_model = model_helper_.GetGeRootModel();
  637. GE_CHECK_NOTNULL(root_model);
  638. root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
  639. root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
  640. single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
  641. GE_CHECK_NOTNULL(single_op.hybrid_model_);
  642. GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
  643. int32_t device_id = 0;
  644. GE_CHK_RT_RET(rtGetDevice(&device_id));
  645. single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
  646. device_id,
  647. resource.GetStream()));
  648. GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
  649. ThreadPool *thread_pool = nullptr;
  650. GE_CHK_STATUS_RET_NOLOG(resource.GetThreadPool(&thread_pool));
  651. GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(thread_pool), "[Init][HybridModelExecutor]Failed.");
  652. return SUCCESS;
  653. }
  654. return BuildTaskListForDynamicOp(&resource, single_op);
  655. }
  656. Status SingleOpModel::SetHostMemTensor(DynamicSingleOp &single_op) {
  657. for (auto &node_map : op_with_hostmem_) {
  658. auto node = node_map.second;
  659. auto out_anchor = node->GetOutDataAnchor(0);
  660. GE_CHECK_NOTNULL(out_anchor);
  661. auto in_anchors = out_anchor->GetPeerInDataAnchors();
  662. vector<GeTensorDescPtr> tensor_descs;
  663. auto idx = node_map.first;
  664. for (auto anchor : in_anchors) {
  665. GE_CHECK_NOTNULL(anchor);
  666. auto output_node = anchor->GetOwnerNode();
  667. GE_CHECK_NOTNULL(output_node);
  668. auto op_desc = output_node->GetOpDesc();
  669. GE_CHECK_NOTNULL(op_desc);
  670. auto tensor_desc = op_desc->MutableInputDesc(anchor->GetIdx());
  671. tensor_descs.emplace_back(tensor_desc);
  672. GELOGD("Get %d th input tensor desc of %s by %d data node: %s.", anchor->GetIdx(),
  673. output_node->GetName().c_str(), idx, node->GetName().c_str());
  674. }
  675. single_op.tensor_with_hostmem_[idx] = tensor_descs;
  676. }
  677. return SUCCESS;
  678. }
  679. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示