You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task_context.cc 23 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "hybrid/node_executor/task_context.h"
  17. #include "framework/common/ge_inner_error_codes.h"
  18. #include "framework/common/debug/log.h"
  19. #include "graph/utils/tensor_utils.h"
  20. #include "external/graph/types.h"
  21. #include "graph/debug/ge_attr_define.h"
  22. #include "hybrid/executor/hybrid_execution_context.h"
  23. #include "hybrid/executor/subgraph_executor.h"
  24. #include "common/profiling/profiling_manager.h"
  25. namespace ge {
  26. namespace hybrid {
  27. TaskContext::TaskContext(GraphExecutionContext *execution_context,
  28. NodeState *node_state,
  29. SubgraphContext *subgraph_context)
  30. : node_state_(node_state),
  31. node_item_(node_state->GetNodeItem()),
  32. execution_context_(execution_context),
  33. subgraph_context_(subgraph_context) {}
  34. TaskContext::~TaskContext() {
  35. GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str());
  36. // release output
  37. for (int i = 0; i < NumOutputs(); ++i) {
  38. auto output_tensor = MutableOutput(i);
  39. if (output_tensor != nullptr) {
  40. output_tensor->Destroy();
  41. }
  42. }
  43. ReleaseWorkspace();
  44. }
  45. void TaskContext::ReleaseWorkspace() {
  46. GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str());
  47. for (auto ws_addr : workspaces_) {
  48. execution_context_->allocator->Deallocate(ws_addr);
  49. }
  50. workspaces_.clear();
  51. }
  52. std::unique_ptr<TaskContext> TaskContext::Create(NodeState *node_state, SubgraphContext *subgraph_context) {
  53. const NodeItem &node_item = *node_state->GetNodeItem();
  54. GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.",
  55. node_item.NodeName().c_str(),
  56. node_item.input_start,
  57. node_item.num_inputs,
  58. node_item.output_start,
  59. node_item.num_outputs);
  60. if (node_item.input_start < 0 || node_item.output_start < 0) {
  61. REPORT_INNER_ERROR("E19999", "NodeItem:%s(%s) not property initialized."
  62. "input_start:%d or output_start:%d less than 0",
  63. node_item.NodeName().c_str(), node_item.NodeType().c_str(),
  64. node_item.input_start, node_item.output_start);
  65. GELOGE(INTERNAL_ERROR,
  66. "[Check][Param]NodeItem:%s(%s) not property initialized. input_start = %d, output_start = %d",
  67. node_item.NodeName().c_str(), node_item.NodeType().c_str(),
  68. node_item.input_start, node_item.output_start);
  69. return nullptr;
  70. }
  71. auto task_context = std::unique_ptr<TaskContext>(
  72. new(std::nothrow)TaskContext(subgraph_context->execution_context_, node_state, subgraph_context));
  73. if (task_context == nullptr) {
  74. REPORT_CALL_ERROR("E19999", "Create TaskContext failed for [%s].", node_item.NodeName().c_str());
  75. GELOGE(MEMALLOC_FAILED, "[Create][TaskContext] failed for [%s].", node_item.NodeName().c_str());
  76. return nullptr;
  77. }
  78. task_context->node_item_ = &node_item;
  79. task_context->inputs_start_ = subgraph_context->all_inputs_.data() + node_item.input_start;
  80. task_context->outputs_start_ = subgraph_context->all_outputs_.data() + node_item.output_start;
  81. task_context->iteration_ = subgraph_context->execution_context_->iteration;
  82. return task_context;
  83. }
  84. int TaskContext::NumInputs() const {
  85. return node_item_->num_inputs;
  86. }
  87. int TaskContext::NumOutputs() const {
  88. return node_item_->num_outputs;
  89. }
  90. TensorValue *TaskContext::MutableInput(int index) {
  91. if (index < 0 || index >= node_item_->num_inputs) {
  92. REPORT_INNER_ERROR("E19999", "Index out of range, check invalid. index = %d, num_inputs = %d, node:%s(%s)",
  93. index, node_item_->num_inputs,
  94. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  95. GELOGE(PARAM_INVALID, "[Check][Param]Index out of range. index = %d, num_inputs = %d, node:%s(%s)",
  96. index, node_item_->num_inputs,
  97. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  98. return nullptr;
  99. }
  100. return inputs_start_ + index;
  101. }
  102. const TensorValue *TaskContext::GetOutput(int index) const {
  103. if (index < 0 || index >= node_item_->num_outputs) {
  104. REPORT_INNER_ERROR("E19999", "Index out of range, check invalid. index = %d, num_outputs = %d, node:%s(%s)",
  105. index, node_item_->num_outputs,
  106. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  107. GELOGE(PARAM_INVALID, "[Check][Param]Index out of range. index = %d, num_outputs = %d, node:%s(%s)",
  108. index, node_item_->num_outputs,
  109. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  110. return nullptr;
  111. }
  112. return outputs_start_ + index;
  113. }
  114. TensorValue *TaskContext::MutableOutput(int index) {
  115. if (index < 0 || index >= node_item_->num_outputs) {
  116. REPORT_INNER_ERROR("E19999", "Index out of range, check invalid. index = %d, num_outputs = %d, node:%s(%s)",
  117. index, node_item_->num_outputs,
  118. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  119. GELOGE(PARAM_INVALID, "[Check][Param]Index out of range. index = %d, num_outputs = %d, node:%s(%s)",
  120. index, node_item_->num_outputs,
  121. node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  122. return nullptr;
  123. }
  124. return outputs_start_ + index;
  125. }
  126. std::size_t TaskContext::NumWorkspaces() const {
  127. return workspaces_.size();
  128. }
  129. void *TaskContext::MutableWorkspace(int index) {
  130. if (index < 0 || static_cast<size_t>(index) >= workspaces_.size()) {
  131. REPORT_INNER_ERROR("E19999", "Index:%d out of range, check invalid. number:%zu of workspaces_, node:%s(%s)",
  132. index, workspaces_.size(), node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  133. GELOGE(PARAM_INVALID, "[Check][Param]Index:%d out of range. number:%zu of workspaces_, node:%s(%s)",
  134. index, workspaces_.size(), node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  135. return nullptr;
  136. }
  137. return workspaces_[index];
  138. }
  139. const TensorValue *TaskContext::GetInput(int index) const {
  140. if (index < 0 || index >= node_item_->num_inputs) {
  141. REPORT_INNER_ERROR("E19999", "Index:%d out of range, check invalid. num_inputs:%d node:%s(%s)",
  142. index, node_item_->num_inputs, node_item_->NodeName().c_str(),
  143. node_item_->NodeType().c_str());
  144. GELOGE(PARAM_INVALID, "[Check][Param]Index:%d out of range. num_inputs:%d node:%s(%s)",
  145. index, node_item_->num_inputs, node_item_->NodeName().c_str(), node_item_->NodeType().c_str());
  146. return nullptr;
  147. }
  148. return inputs_start_ + index;
  149. }
  150. Status TaskContext::AllocateWorkspaces() {
  151. auto workspace_sizes = node_item_->node->GetOpDesc()->GetWorkspaceBytes();
  152. for (auto size : workspace_sizes) {
  153. void *workspace = execution_context_->allocator->Allocate(size);
  154. if (workspace == nullptr) {
  155. REPORT_CALL_ERROR("E19999", "node:%s(%s) Allocate workspace failed, size: %ld",
  156. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(), size);
  157. GELOGE(MEMALLOC_FAILED, "[Allocate][workspace] failed for node:%s(%s), size: %ld",
  158. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(), size);
  159. return MEMALLOC_FAILED;
  160. }
  161. workspaces_.emplace_back(workspace);
  162. }
  163. return SUCCESS;
  164. }
  165. Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
  166. if (callback_fun == nullptr) {
  167. GELOGW("[%s] Callback is NULL", GetNodeName());
  168. return SUCCESS;
  169. }
  170. auto ret = execution_context_->callback_manager->RegisterCallback(GetStream(), callback_fun);
  171. if (ret != SUCCESS) {
  172. REPORT_CALL_ERROR("E19999", "RegisterCallback failed for [%s]", GetNodeName());
  173. GELOGE(ret, "[Register][Callback] failed for [%s]", GetNodeName());
  174. execution_context_->callback_manager->Destroy();
  175. return ret;
  176. }
  177. return SUCCESS;
  178. }
  179. string TaskContext::TensorDesc2String(const GeTensorDesc &desc) {
  180. std::stringstream ss;
  181. ss << "[TensorDesc] ";
  182. ss << "DataType = " << desc.GetDataType();
  183. ss << ", Format = " << desc.GetFormat();
  184. ss << ", Shape = [";
  185. for (auto dim : desc.GetShape().GetDims()) {
  186. ss << dim << ", ";
  187. }
  188. ss << "]";
  189. return ss.str();
  190. }
  191. Status TaskContext::AllocateTensor(const GeTensorDesc &tensor_desc, TensorValue &tensor, AllocationAttr *attr) {
  192. int64_t size = 0;
  193. if (ge::TensorUtils::GetSize(tensor_desc, size) != GRAPH_SUCCESS) {
  194. REPORT_CALL_ERROR("E19999", "Get TensorSize failed, tensor:%s", tensor_desc.GetName().c_str());
  195. GELOGE(INTERNAL_ERROR, "[Get][TensorSize] failed, tensor:%s", tensor_desc.GetName().c_str());
  196. return INTERNAL_ERROR;
  197. }
  198. if (size == 0) {
  199. GELOGW("size from tensor_desc == 0");
  200. }
  201. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  202. GE_CHECK_NOTNULL(buffer);
  203. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  204. return SUCCESS;
  205. }
  206. Status TaskContext::AllocateOutput(int index,
  207. const GeTensorDesc &tensor_desc,
  208. TensorValue **tensor,
  209. AllocationAttr *attr) {
  210. GELOGI("To allocate output for node: %s. index = %d, tensor desc = %s",
  211. node_item_->NodeName().c_str(),
  212. index,
  213. TensorDesc2String(tensor_desc).c_str());
  214. if (index < 0 || index >= node_item_->num_outputs) {
  215. REPORT_INNER_ERROR("E19999", "%s(%s) output index out of range check invalid. num_output = %d, index = %d",
  216. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(),
  217. node_item_->num_outputs, index);
  218. GELOGE(PARAM_INVALID, "[Check][Param] %s(%s) output index out of range. num_output = %d, index = %d",
  219. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(),
  220. node_item_->num_outputs, index);
  221. return PARAM_INVALID;
  222. }
  223. if (outputs_start_[index].GetData() != nullptr) {
  224. GELOGI("already allocated as net output");
  225. return SUCCESS;
  226. }
  227. int32_t calc_type = 0;
  228. bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
  229. if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
  230. outputs_start_[index] = TensorValue();
  231. return SUCCESS;
  232. }
  233. auto it = node_item_->ref_outputs.find(index);
  234. if (it != node_item_->ref_outputs.end()) {
  235. auto &ref_node = it->second;
  236. GELOGD("source node of %s:%d = %s, op_type = %s",
  237. node_item_->NodeName().c_str(),
  238. index,
  239. ref_node->GetName().c_str(),
  240. ref_node->GetType().c_str());
  241. TensorValue *ref_tensor = execution_context_->model->GetTensor(ref_node);
  242. GE_CHECK_NOTNULL(ref_tensor);
  243. outputs_start_[index] = *ref_tensor;
  244. } else {
  245. auto reuse_output_it = node_item_->reuse_outputs.find(index);
  246. if (reuse_output_it != node_item_->reuse_outputs.end()) {
  247. GELOGD("[%s] reuse output [%d] with output [%d]", GetNodeName(), index, reuse_output_it->second);
  248. outputs_start_[index] = outputs_start_[reuse_output_it->second];
  249. } else {
  250. auto reuse_input = node_item_->reuse_inputs.find(index);
  251. if (reuse_input != node_item_->reuse_inputs.end()) {
  252. GELOGD("[%s] Output[%d] is referenced to input[%d]", GetNodeName(), index, reuse_input->second);
  253. outputs_start_[index] = inputs_start_[reuse_input->second];
  254. } else {
  255. GE_CHK_STATUS_RET_NOLOG(AllocateTensor(tensor_desc, outputs_start_[index], attr));
  256. GELOGD("Allocating output successfully. node: %s. index = %d, size = %zu",
  257. node_item_->NodeName().c_str(), index, outputs_start_[index].GetSize());
  258. }
  259. }
  260. }
  261. if (execution_context_->trace_enabled) {
  262. outputs_start_[index].SetName(node_item_->NodeName() + "_out_" + std::to_string(index));
  263. }
  264. if (tensor != nullptr) {
  265. *tensor = outputs_start_ + index;
  266. }
  267. return SUCCESS;
  268. }
  269. Status TaskContext::AllocateOutputs(AllocationAttr *attr) {
  270. for (int i = 0; i < node_item_->num_outputs; ++i) {
  271. const auto &output_desc = node_item_->MutableOutputDesc(i);
  272. GE_CHECK_NOTNULL(output_desc);
  273. uint32_t mem_type = 0;
  274. (void)AttrUtils::GetInt(output_desc, ATTR_OUTPUT_MEMORY_TYPE, mem_type);
  275. if (attr == nullptr) {
  276. auto tmp_attr = AllocationAttr(0, nullptr, static_cast<MemStorageType>(mem_type));
  277. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, &tmp_attr));
  278. } else {
  279. attr->SetMemType(static_cast<MemStorageType>(mem_type));
  280. GE_CHK_STATUS_RET_NOLOG(AllocateOutput(i, *output_desc, nullptr, attr));
  281. }
  282. }
  283. return SUCCESS;
  284. }
  285. Status TaskContext::AllocateTensor(size_t size, TensorValue &tensor, AllocationAttr *attr) {
  286. auto buffer = TensorBuffer::Create(execution_context_->allocator, size, attr);
  287. if (buffer == nullptr) {
  288. REPORT_CALL_ERROR("E19999", "%s(%s) Allocate buffer failed, size: %zu",
  289. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(), size);
  290. GELOGE(MEMALLOC_FAILED, "[Allocate][buffer] failed for %s(%s), size: %zu",
  291. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(), size);
  292. return MEMALLOC_FAILED;
  293. }
  294. tensor = TensorValue(shared_ptr<TensorBuffer>(buffer.release()));
  295. return SUCCESS;
  296. }
  297. const NodeItem &TaskContext::GetNodeItem() const {
  298. return *node_item_;
  299. }
  300. Status TaskContext::SetOutput(int index, const TensorValue &tensor) {
  301. if (index < 0 || index >= node_item_->num_outputs) {
  302. REPORT_INNER_ERROR("E19999", "%s(%s) output index out of range check invalid. num_output = %d, index = %d",
  303. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(),
  304. node_item_->num_outputs, index);
  305. GELOGE(PARAM_INVALID, "[Check][Param]%s(%s) output index out of range. num_output = %d, index = %d",
  306. node_item_->NodeName().c_str(), node_item_->NodeType().c_str(),
  307. node_item_->num_outputs, index);
  308. return PARAM_INVALID;
  309. }
  310. GELOGD("Set %s:%d with tensor: %s",
  311. node_item_->NodeName().c_str(),
  312. index,
  313. tensor.DebugString().c_str());
  314. outputs_start_[index] = tensor;
  315. return SUCCESS;
  316. }
  317. rtStream_t TaskContext::GetStream() const {
  318. return execution_context_->stream;
  319. }
  320. int64_t TaskContext::GetSessionId() const {
  321. return execution_context_->session_id;
  322. }
  323. Status TaskContext::GetStatus() const {
  324. return status_;
  325. }
  326. void TaskContext::SetStatus(Status status) {
  327. status_ = status;
  328. if (status != SUCCESS) {
  329. execution_context_->SetErrorCode(status);
  330. }
  331. }
  332. uint32_t TaskContext::GetTaskId() const {
  333. return task_id_;
  334. }
  335. void TaskContext::SetTaskId(uint32_t task_id) {
  336. task_id_ = task_id;
  337. }
  338. uint32_t TaskContext::GetStreamId() const {
  339. return stream_id_;
  340. }
  341. void TaskContext::SetStreamId(uint32_t stream_id) {
  342. stream_id_ = stream_id;
  343. }
  344. void TaskContext::SetOverFlow(bool is_over_flow) {
  345. is_over_flow_ = is_over_flow;
  346. }
  347. bool TaskContext::IsOverFlow() {
  348. return is_over_flow_;
  349. }
  350. Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
  351. GE_CHECK_NOTNULL(buffer);
  352. if (ori_addr == nullptr) {
  353. *buffer = execution_context_->allocator->Allocate(size, nullptr);
  354. } else {
  355. AllocationAttr attr(ori_addr);
  356. *buffer = execution_context_->allocator->Allocate(size, &attr);
  357. }
  358. if (*buffer == nullptr) {
  359. REPORT_CALL_ERROR("E19999", "Allocate Workspace failed, size = %zu", size);
  360. GELOGE(MEMALLOC_FAILED, "[Allocate][Workspace] failed, size = %zu", size);
  361. return MEMALLOC_FAILED;
  362. }
  363. GELOGD("[%s] Allocating workspace of size = %zu successfully", node_item_->NodeName().c_str(), size);
  364. workspaces_.emplace_back(*buffer);
  365. return SUCCESS;
  366. }
  367. Status TaskContext::PropagateOutputs() {
  368. // propagate outputs
  369. const auto &guard = node_item_->MutexGuard("PropagateOutputs");
  370. for (int i = 0; i < NumOutputs(); ++i) {
  371. auto tensor = MutableOutput(i);
  372. GE_CHECK_NOTNULL(tensor);
  373. if (tensor->GetData() == nullptr) {
  374. GELOGD("[%s] Node output[%d] is null.", node_item_->NodeName().c_str(), i);
  375. }
  376. auto &output_nodes = node_item_->outputs[i];
  377. for (auto &dst_input_index_and_node : output_nodes) {
  378. auto dst_input_idx = dst_input_index_and_node.first;
  379. auto dst_node_item = dst_input_index_and_node.second;
  380. auto input_offset = dst_node_item->input_start + dst_input_idx;
  381. GELOGD(
  382. "Propagate output of node %s, output index = %d, dst node = %s, "
  383. "dst_input_index = %d, dst_input_offset = %d.",
  384. node_item_->NodeName().c_str(),
  385. i,
  386. dst_node_item->NodeName().c_str(),
  387. dst_input_idx,
  388. input_offset);
  389. if (subgraph_context_->all_inputs_.size() <= static_cast<size_t>(input_offset)) {
  390. REPORT_INNER_ERROR("E19999",
  391. "[%s] input index out of range check invalid. index = %d, total input num = %zu",
  392. GetNodeName(), input_offset, subgraph_context_->all_inputs_.size());
  393. GELOGE(INTERNAL_ERROR, "[Check][Size][%s] input index out of range. index = %d, total input num = %zu",
  394. GetNodeName(), input_offset, subgraph_context_->all_inputs_.size());
  395. return INTERNAL_ERROR;
  396. }
  397. subgraph_context_->all_inputs_[input_offset] = *tensor;
  398. if (execution_context_->trace_enabled) {
  399. subgraph_context_->all_inputs_[input_offset].SetName(
  400. node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx));
  401. }
  402. }
  403. }
  404. (void)guard;
  405. return SUCCESS;
  406. }
  407. const void *TaskContext::GetVarBaseAddr() {
  408. return execution_context_->model->GetVarMemBase();
  409. }
  410. const char *TaskContext::GetNodeName() const {
  411. return node_item_->NodeName().c_str();
  412. }
  413. void TaskContext::ReleaseInputsAndOutputs() {
  414. for (int i = 0; i < node_item_->num_inputs; ++i) {
  415. auto tensor = inputs_start_ + i;
  416. tensor->Destroy();
  417. GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
  418. }
  419. for (int i = 0; i < node_item_->num_outputs; ++i) {
  420. auto tensor = outputs_start_ + i;
  421. tensor->Destroy();
  422. GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
  423. }
  424. }
  425. void TaskContext::ReleaseInput(int index) {
  426. auto input_tensor = MutableInput(index);
  427. if (input_tensor != nullptr) {
  428. node_state_->SavePersistTensor(index, *input_tensor);
  429. input_tensor->Destroy();
  430. GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), index);
  431. }
  432. }
  433. ConstGeTensorDescPtr TaskContext::GetOutputDesc(int index) const {
  434. return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
  435. }
  436. ConstGeTensorDescPtr TaskContext::GetInputDesc(int index) const {
  437. return node_item_->MutableInputDesc(index);
  438. }
  439. GeTensorDescPtr TaskContext::MutableInputDesc(int index) const {
  440. return node_item_->MutableInputDesc(index);
  441. }
  442. GeTensorDescPtr TaskContext::MutableOutputDesc(int index) const {
  443. return node_item_->MutableOutputDesc(static_cast<uint32_t>(index));
  444. }
  445. bool TaskContext::IsForceInferShape() const {
  446. return force_infer_shape_;
  447. }
  448. void TaskContext::SetForceInferShape(bool force_infer_shape) {
  449. force_infer_shape_ = force_infer_shape;
  450. }
  451. void TaskContext::NodeDone() {
  452. subgraph_context_->NodeDone(node_item_->node);
  453. }
  454. void TaskContext::OnError(Status error) {
  455. subgraph_context_->OnError(error);
  456. execution_context_->SetErrorCode(error);
  457. }
  458. bool TaskContext::IsTraceEnabled() const {
  459. return execution_context_->trace_enabled;
  460. }
  461. TensorValue *TaskContext::GetVariable(const std::string &name) {
  462. return execution_context_->model->GetVariable(name);
  463. }
  464. uint64_t TaskContext::GetIterationNumber() const {
  465. return iteration_;
  466. }
  467. bool TaskContext::IsDumpEnabled() const {
  468. return execution_context_->dump_enabled;
  469. }
  470. Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) const {
  471. if (!callback_fun) {
  472. return SUCCESS;
  473. }
  474. if (node_item_->has_observer) {
  475. return RegisterCallback(callback_fun);
  476. }
  477. callback_fun();
  478. return SUCCESS;
  479. }
  480. const DumpProperties &TaskContext::GetDumpProperties() const {
  481. return execution_context_->dump_properties;
  482. }
  483. bool TaskContext::NeedCallback() {
  484. return node_item_->has_observer || IsDumpEnabled() || GraphExecutionContext::profiling_level > 0 ||
  485. !execution_context_->model->IsSingleOp() || ProfilingManager::Instance().ProfilingModelLoadOn();
  486. }
  487. Status TaskContext::Synchronize() {
  488. return execution_context_->Synchronize(GetStream());
  489. }
  490. Status TaskContext::SaveProfilingTaskDescInfo(uint32_t task_id, uint32_t stream_id, const std::string &task_type,
  491. uint32_t block_dim, const std::string &op_type) {
  492. if (ProfilingManager::Instance().ProfilingModelLoadOn()) {
  493. const NodeItem &node_item = GetNodeItem();
  494. auto op_desc = node_item.GetOpDesc();
  495. GE_CHECK_NOTNULL(op_desc);
  496. const GraphExecutionContext *graph_context = GetExecutionContext();
  497. GE_CHECK_NOTNULL(graph_context);
  498. const HybridModel *model = graph_context->model;
  499. GE_CHECK_NOTNULL(model);
  500. std::string dynamic_model_name = model->GetModelName();
  501. TaskDescInfo tmp_task_desc_info;
  502. tmp_task_desc_info.model_name = dynamic_model_name;
  503. tmp_task_desc_info.op_name = op_desc->GetName();
  504. tmp_task_desc_info.op_type = op_type;
  505. tmp_task_desc_info.block_dim = block_dim;
  506. tmp_task_desc_info.task_type = task_type;
  507. tmp_task_desc_info.task_id = task_id;
  508. tmp_task_desc_info.stream_id = stream_id;
  509. tmp_task_desc_info.shape_type = "dynamic";
  510. tmp_task_desc_info.cur_iter_num = iteration_ + 1;
  511. task_desc_info.emplace_back(tmp_task_desc_info);
  512. }
  513. return SUCCESS;
  514. }
  515. NodeState *TaskContext::GetNodeState() const {
  516. return node_state_;
  517. }
  518. Status TaskContext::GetInputDesc(int index, GeTensorDesc &tensor_desc) const {
  519. return node_item_->GetInputDesc(index, tensor_desc);
  520. }
  521. Status TaskContext::UpdateInputDesc(int index, const GeTensorDesc &tensor_desc) {
  522. return const_cast<NodeItem *>(node_item_)->UpdateInputDesc(index, tensor_desc);
  523. }
  524. Status TaskContext::GetOutputDesc(int index, GeTensorDesc &tensor_desc) const {
  525. return node_item_->GetOutputDesc(index, tensor_desc);
  526. }
  527. } // namespace hybrid
  528. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示