You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

subgraph_executor.cc 31 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "hybrid/executor/subgraph_executor.h"
  17. #include "graph/ge_context.h"
  18. #include "hybrid/executor/worker/task_compile_engine.h"
  19. #include "hybrid/executor/worker/execution_engine.h"
  20. #include "hybrid/node_executor/node_executor.h"
  21. namespace ge {
  22. namespace hybrid {
  23. namespace {
  24. constexpr int kDefaultThreadNum = 4;
  25. constexpr int kDefaultQueueSize = 16;
  26. constexpr int kDataInputIndex = 0;
  27. }
  28. SubgraphExecutor::SubgraphExecutor(const GraphItem *graph_item, GraphExecutionContext *context, bool force_infer_shape,
  29. ThreadPool *pre_run_pool)
  30. : graph_item_(graph_item),
  31. context_(context),
  32. force_infer_shape_(force_infer_shape),
  33. pre_run_pool_(pre_run_pool),
  34. own_thread_pool_(false),
  35. ready_queue_(kDefaultQueueSize) {
  36. }
  37. SubgraphExecutor::~SubgraphExecutor() {
  38. if (own_thread_pool_ && pre_run_pool_ != nullptr) {
  39. delete pre_run_pool_;
  40. }
  41. GELOGD("[%s] SubgraphExecutor destroyed.", graph_item_->GetName().c_str());
  42. }
  43. Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
  44. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  45. if (pre_run_pool_ == nullptr) {
  46. pre_run_pool_ = new (std::nothrow) ThreadPool(kDefaultThreadNum);
  47. GE_CHECK_NOTNULL(pre_run_pool_);
  48. own_thread_pool_ = true;
  49. }
  50. subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_));
  51. GE_CHECK_NOTNULL(subgraph_context_);
  52. GE_CHK_STATUS_RET(subgraph_context_->Init(),
  53. "[Init][SubgraphContext][%s] Failed to init subgraph context.", graph_item_->GetName().c_str());
  54. shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get()));
  55. GE_CHECK_NOTNULL(shape_inference_engine_);
  56. if (graph_item_->IsDynamic()) {
  57. GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc),
  58. "[%s] Failed to set inputs.",
  59. graph_item_->GetName().c_str());
  60. } else {
  61. GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
  62. "[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph.",
  63. graph_item_->GetName().c_str());
  64. }
  65. return SUCCESS;
  66. }
  67. Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue> &inputs,
  68. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  69. // Number of inputs of parent node should be greater or equal than that of subgraph
  70. auto input_nodes = graph_item_->GetInputNodes();
  71. if (inputs.size() < input_nodes.size()) {
  72. GELOGE(INTERNAL_ERROR,
  73. "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
  74. graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
  75. REPORT_INNER_ERROR("E19999",
  76. "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
  77. graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
  78. return INTERNAL_ERROR;
  79. }
  80. for (size_t i = 0; i < input_nodes.size(); ++i) {
  81. auto &input_node = input_nodes[i];
  82. if (input_node == nullptr) {
  83. GELOGD("[%s] Input[%zu] is not needed by subgraph, skip it.", graph_item_->GetName().c_str(), i);
  84. continue;
  85. }
  86. auto &input_tensor = inputs[i];
  87. GELOGD("[%s] Set input tensor[%zu] to inputs with index = %d, tensor = %s",
  88. graph_item_->GetName().c_str(),
  89. i,
  90. input_node->input_start,
  91. input_tensor.DebugString().c_str());
  92. GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
  93. "[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]",
  94. graph_item_->GetName().c_str(), i);
  95. if (force_infer_shape_ || input_node->is_dynamic) {
  96. GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
  97. GE_CHECK_LE(i + 1, input_desc.size());
  98. const auto &tensor_desc = input_desc[i];
  99. GE_CHECK_NOTNULL(tensor_desc);
  100. auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
  101. GE_CHECK_NOTNULL(node_state);
  102. node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
  103. auto op_desc = input_node->GetOpDesc();
  104. GE_CHECK_NOTNULL(op_desc);
  105. auto output_desc = op_desc->MutableOutputDesc(kDataInputIndex);
  106. GE_CHECK_NOTNULL(output_desc);
  107. output_desc->SetShape(tensor_desc->GetShape());
  108. output_desc->SetOriginShape(tensor_desc->GetOriginShape());
  109. node_state->SetSkipInferShape(true);
  110. }
  111. }
  112. GELOGD("[%s] Done setting inputs.", graph_item_->GetName().c_str());
  113. return SUCCESS;
  114. }
  115. Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> &inputs) {
  116. auto &input_index_mapping = graph_item_->GetInputIndexMapping();
  117. for (size_t i = 0; i < input_index_mapping.size(); ++i) {
  118. auto &parent_input_index = input_index_mapping[i];
  119. if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
  120. GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph"
  121. "which needs at lease [%d] inputs", graph_item_->GetName().c_str(), inputs.size(),
  122. parent_input_index + 1);
  123. REPORT_INNER_ERROR("E19999", "[%s] Number of inputs [%zu] is not sufficient for subgraph"
  124. "which needs at lease [%d] inputs",
  125. graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
  126. return INTERNAL_ERROR;
  127. }
  128. auto &input_tensor = inputs[parent_input_index];
  129. subgraph_context_->SetInput(static_cast<int>(i), input_tensor);
  130. GELOGD("[%s] Set input tensor[%zu] with inputs with index = %d, tensor = %s",
  131. graph_item_->GetName().c_str(),
  132. i,
  133. parent_input_index,
  134. input_tensor.DebugString().c_str());
  135. }
  136. return SUCCESS;
  137. }
  138. Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
  139. const std::vector<ConstGeTensorDescPtr> &input_desc,
  140. const std::vector<TensorValue> &outputs) {
  141. GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
  142. GE_CHK_STATUS_RET(Init(inputs, input_desc), "[Invoke][Init]failed for [%s].", graph_item_->GetName().c_str());
  143. if (!outputs.empty()) {
  144. GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
  145. "[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
  146. }
  147. if (!graph_item_->IsDynamic()) {
  148. return ExecuteAsyncForKnownShape(inputs);
  149. }
  150. HYBRID_CHK_STATUS_RET(ScheduleTasks(), "[%s] Failed to execute tasks.", graph_item_->GetName().c_str());
  151. GELOGD("[%s] Done executing subgraph successfully.", graph_item_->GetName().c_str());
  152. return SUCCESS;
  153. }
  154. Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
  155. const std::vector<ConstGeTensorDescPtr> &input_desc) {
  156. return ExecuteAsync(inputs, input_desc, {});
  157. }
  158. Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
  159. GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
  160. if (graph_item_->GetAllNodes().size() != 1) {
  161. REPORT_INNER_ERROR("E19999", "[%s] Invalid known shape subgraph. node size = %zu",
  162. graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size());
  163. GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Invalid known shape subgraph. node size = %zu",
  164. graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size());
  165. return INTERNAL_ERROR;
  166. }
  167. auto node_item = graph_item_->GetAllNodes()[0];
  168. GE_CHECK_NOTNULL(node_item);
  169. auto node_state = subgraph_context_->GetOrCreateNodeState(node_item);
  170. GE_CHECK_NOTNULL(node_state);
  171. node_state->SetKernelTask(node_item->kernel_task);
  172. std::function<void()> callback;
  173. GE_CHK_STATUS_RET_NOLOG(InitCallback(node_state.get(), callback));
  174. HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, node_state->GetTaskContext(), *context_, callback),
  175. "[%s] Failed to execute node [%s] for known subgraph.",
  176. graph_item_->GetName().c_str(),
  177. node_state->GetName().c_str());
  178. GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str());
  179. return SUCCESS;
  180. }
  181. Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
  182. std::vector<TensorValue> inputs;
  183. std::vector<ConstGeTensorDescPtr> input_desc;
  184. for (int i = 0; i < task_context.NumInputs(); ++i) {
  185. auto tensor = task_context.GetInput(i);
  186. GE_CHECK_NOTNULL(tensor);
  187. inputs.emplace_back(*tensor);
  188. input_desc.emplace_back(task_context.GetInputDesc(i));
  189. }
  190. GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), "[Invoke][ExecuteAsync] failed for [%s].",
  191. graph_item_->GetName().c_str());
  192. GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context),
  193. "[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
  194. graph_item_->GetName().c_str());
  195. return SUCCESS;
  196. }
  197. BlockingQueue<const NodeItem *> &SubgraphExecutor::GetPrepareQueue(int group) {
  198. std::lock_guard<std::mutex> lk(mu_);
  199. return prepare_queues_[group];
  200. }
  201. Status SubgraphExecutor::NodeEnqueue(NodeState *node_state) {
  202. if (!ready_queue_.Push(node_state)) {
  203. if (context_->is_eos_) {
  204. GELOGD("Got end of sequence");
  205. return SUCCESS;
  206. }
  207. GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
  208. graph_item_->GetName().c_str());
  209. REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes.",
  210. graph_item_->GetName().c_str());
  211. return INTERNAL_ERROR;
  212. }
  213. GELOGD("[%s] Push node [%s] to queue.", graph_item_->GetName().c_str(), node_state->GetName().c_str());
  214. return SUCCESS;
  215. }
  216. Status SubgraphExecutor::PrepareNode(const NodeItem &node_item, int group) {
  217. GELOGD("[%s] Start to prepare node [%s].", graph_item_->GetName().c_str(), node_item.NodeName().c_str());
  218. // for while op
  219. if (force_infer_shape_ && !node_item.is_dynamic) {
  220. GELOGD("[%s] Force infer shape is set, updating node to dynamic.", node_item.NodeName().c_str());
  221. auto &mutable_node_item = const_cast<NodeItem &>(node_item);
  222. mutable_node_item.SetToDynamic();
  223. }
  224. auto node_state = subgraph_context_->GetOrCreateNodeState(&node_item);
  225. GE_CHECK_NOTNULL(node_state);
  226. auto p_node_state = node_state.get();
  227. if (node_item.node_type == NETOUTPUT) {
  228. GE_CHK_STATUS_RET_NOLOG(NodeEnqueue(p_node_state));
  229. return AfterPrepared(p_node_state);
  230. }
  231. // only do shape inference and compilation for nodes with dynamic shapes.
  232. if (node_item.is_dynamic) {
  233. GE_CHECK_NOTNULL(pre_run_pool_);
  234. auto prepare_future = pre_run_pool_->commit([this, p_node_state]() -> Status {
  235. GetContext().SetSessionId(context_->session_id);
  236. GetContext().SetContextId(context_->context_id);
  237. GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
  238. GE_CHK_STATUS_RET_NOLOG(PrepareForExecution(context_, *p_node_state));
  239. return AfterPrepared(p_node_state);
  240. });
  241. p_node_state->SetPrepareFuture(std::move(prepare_future));
  242. return NodeEnqueue(p_node_state);
  243. } else {
  244. GELOGD("[%s] Skipping shape inference and compilation for node with static shape.",
  245. node_item.NodeName().c_str());
  246. if (node_item.kernel_task == nullptr) {
  247. GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
  248. GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
  249. "[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
  250. } else {
  251. node_state->SetKernelTask(node_item.kernel_task);
  252. }
  253. const auto &task = node_state->GetKernelTask();
  254. if (task == nullptr) {
  255. GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
  256. REPORT_CALL_ERROR("E19999", "GetKernelTask failed for %s, nodetask is null.", node_state->GetName().c_str());
  257. return INTERNAL_ERROR;
  258. }
  259. GE_CHK_STATUS_RET_NOLOG(NodeEnqueue(p_node_state));
  260. return AfterPrepared(p_node_state);
  261. }
  262. }
  263. Status SubgraphExecutor::PrepareNodes(int group) {
  264. const size_t node_size = graph_item_->GetNodeSize(group);
  265. GELOGD("[%s] Start to prepare nodes. group = %d, size = %zu", graph_item_->GetName().c_str(), group, node_size);
  266. if (!graph_item_->HasCtrlFlowOp()) {
  267. for (const auto &node_item : graph_item_->GetAllNodes(group)) {
  268. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
  269. GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
  270. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
  271. }
  272. GELOGD("[%s] Done preparing nodes successfully.", graph_item_->GetName().c_str());
  273. return SUCCESS;
  274. }
  275. // Initialize the ready queue
  276. size_t node_count = 0;
  277. bool node_complete = false;
  278. for (const auto &node_item : graph_item_->GetRootNodes(group)) {
  279. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
  280. GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
  281. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
  282. node_complete = node_item->NodeType() == NETOUTPUT;
  283. node_count++;
  284. }
  285. GELOGD("[%s] Done preparing root nodes.", graph_item_->GetName().c_str());
  286. BlockingQueue<const NodeItem *> &prepare_queue = GetPrepareQueue(group);
  287. while (((group != -1) && (node_count < node_size)) || ((group == -1) && !node_complete)) {
  288. const NodeItem *node_item = nullptr;
  289. if (!prepare_queue.Pop(node_item)) {
  290. if (context_->is_eos_) {
  291. GELOGD("[%s] Got end of sequence.", graph_item_->GetName().c_str());
  292. break;
  293. }
  294. if (context_->GetStatus() != SUCCESS) {
  295. GELOGD("[%s] Graph execution Got failed.", graph_item_->GetName().c_str());
  296. return SUCCESS;
  297. }
  298. GELOGE(INTERNAL_ERROR, "[%s] failed to pop node.", graph_item_->GetName().c_str());
  299. return INTERNAL_ERROR;
  300. }
  301. if (node_item == nullptr) {
  302. GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
  303. break;
  304. }
  305. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] Start");
  306. GE_CHK_STATUS_RET(PrepareNode(*node_item, group), "[%s] failed to prepare task.", node_item->NodeName().c_str());
  307. RECORD_EXECUTION_EVENT(context_, node_item->NodeName().c_str(), "[PrepareNode] End");
  308. node_complete = node_item->NodeType() == NETOUTPUT;
  309. node_count++;
  310. }
  311. GELOGD("[%s] Done preparing nodes successfully.", graph_item_->GetName().c_str());
  312. return SUCCESS;
  313. }
  314. Status SubgraphExecutor::NodeScheduled(NodeState *node_state) {
  315. GELOGD("Graph[%s] After [%s] scheduled, data size: %zu, ctrl size: %zu, switch index: %d, merge index: %d",
  316. graph_item_->GetName().c_str(), node_state->GetName().c_str(),
  317. node_state->GetNodeItem()->data_send_.size(), node_state->GetNodeItem()->ctrl_send_.size(),
  318. node_state->GetSwitchIndex(), node_state->GetMergeIndex());
  319. GE_CHECK_NOTNULL(pre_run_pool_);
  320. auto future = pre_run_pool_->commit([this, node_state]() -> Status {
  321. RECORD_CALLBACK_EVENT(context_, node_state->GetName().c_str(), "[NodeScheduled] Start");
  322. std::function<void(const NodeItem *)> callback = [&](const NodeItem *node_item) {
  323. const auto &node_name = node_item->node_name;
  324. int group = (node_state->GetGroup() != -1) ? node_item->group : -1;
  325. GELOGI("After [%s] scheduled, [%s] is ready for prepare.", node_state->GetName().c_str(), node_name.c_str());
  326. BlockingQueue<const NodeItem *> &prepare_queue = GetPrepareQueue(group);
  327. if (!prepare_queue.Push(node_item)) {
  328. if (!context_->is_eos_) {
  329. GELOGE(INTERNAL_ERROR, "[Check][State][%s] error occurs when push to queue.", graph_item_->GetName().c_str());
  330. REPORT_INNER_ERROR("E19999", "[%s] error occurs when push to queue.", graph_item_->GetName().c_str());
  331. }
  332. }
  333. };
  334. GE_CHK_STATUS_RET_NOLOG(node_state->NodeScheduled(callback));
  335. RECORD_CALLBACK_EVENT(context_, node_state->GetName().c_str(), "[NodeScheduled] End");
  336. return SUCCESS;
  337. });
  338. node_state->SetScheduleFuture(std::move(future));
  339. if (schedule_queue_.Push(node_state)) {
  340. return SUCCESS;
  341. }
  342. if (context_->is_eos_) {
  343. GELOGD("[%s] Got end of sequence", graph_item_->GetName().c_str());
  344. return SUCCESS;
  345. }
  346. GELOGE(INTERNAL_ERROR, "[Check][State][%s] error occurs when push to queue.", graph_item_->GetName().c_str());
  347. REPORT_INNER_ERROR("E19999", "[%s] error occurs when push to queue.", graph_item_->GetName().c_str());
  348. return INTERNAL_ERROR;
  349. }
  350. Status SubgraphExecutor::AfterPrepared(NodeState *node_state) {
  351. if (!graph_item_->HasCtrlFlowOp()) {
  352. return SUCCESS;
  353. }
  354. if (node_state->IsShapeDependence()) {
  355. return SUCCESS;
  356. }
  357. // Not control flow node, propagate state.
  358. return NodeScheduled(node_state);
  359. }
  360. void SubgraphExecutor::AfterExecuted(NodeState *node_state) {
  361. if (!node_state->IsShapeDependence()) {
  362. return;
  363. }
  364. // For control flow node, propagate state.
  365. auto error = NodeScheduled(node_state);
  366. if (error != SUCCESS) {
  367. auto task_context = node_state->GetTaskContext();
  368. task_context->OnError(error);
  369. }
  370. }
  371. void SubgraphExecutor::OnNodeDone(NodeState *node_state) {
  372. auto task_context = node_state->GetTaskContext();
  373. NodeDoneCallback cb(context_, task_context);
  374. auto error = cb.OnNodeDone();
  375. if (error != SUCCESS) {
  376. task_context->OnError(error);
  377. }
  378. if (node_state->IsShapeDependence() && graph_item_->HasCtrlFlowOp()) {
  379. AfterExecuted(node_state);
  380. }
  381. }
  382. Status SubgraphExecutor::InitCallback(NodeState *node_state, std::function<void()> &callback) {
  383. auto task_context = node_state->GetTaskContext();
  384. GE_CHECK_NOTNULL(task_context);
  385. if (task_context->NeedCallback()) {
  386. callback = std::bind(&SubgraphExecutor::OnNodeDone, this, node_state);
  387. } else if (node_state->IsShapeDependence() && graph_item_->HasCtrlFlowOp()) {
  388. callback = std::bind(&SubgraphExecutor::AfterExecuted, this, node_state);
  389. }
  390. return SUCCESS;
  391. }
  392. Status SubgraphExecutor::ScheduleNodes() {
  393. GELOGD("[%s] Start to schedule nodes.", graph_item_->GetName().c_str());
  394. while (true) {
  395. NodeState *node_state = nullptr;
  396. if (!schedule_queue_.Pop(node_state)) {
  397. if (context_->is_eos_) {
  398. GELOGD("[%s] Got end of sequence.", graph_item_->GetName().c_str());
  399. break;
  400. }
  401. if (context_->GetStatus() != SUCCESS) {
  402. GELOGD("[%s] Graph execution Got failed.", graph_item_->GetName().c_str());
  403. return SUCCESS;
  404. }
  405. GELOGE(INTERNAL_ERROR, "[%s] failed to pop node.", graph_item_->GetName().c_str());
  406. return INTERNAL_ERROR;
  407. }
  408. if (node_state == nullptr) {
  409. GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
  410. break;
  411. }
  412. GE_CHK_STATUS_RET_NOLOG(node_state->WaitForScheduleDone());
  413. }
  414. GELOGD("[%s] Done schedule nodes successfully.", graph_item_->GetName().c_str());
  415. return SUCCESS;
  416. }
  417. Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
  418. HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
  419. "[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
  420. HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
  421. "[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
  422. return SUCCESS;
  423. }
  424. Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state) {
  425. auto &node_item = *node_state.GetNodeItem();
  426. if (node_item.kernel_task == nullptr) {
  427. GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx),
  428. "[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
  429. } else {
  430. node_state.SetKernelTask(node_item.kernel_task);
  431. }
  432. const auto &task = node_state.GetKernelTask();
  433. if (task == nullptr) {
  434. GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str());
  435. REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null.", node_state.GetName().c_str());
  436. return INTERNAL_ERROR;
  437. }
  438. GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context));
  439. RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start");
  440. GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*node_state.GetTaskContext())); // update op_desc before alloc ws
  441. RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end");
  442. return SUCCESS;
  443. }
  444. Status SubgraphExecutor::LaunchTasks() {
  445. while (true) {
  446. NodeState *node_state = nullptr;
  447. if (!ready_queue_.Pop(node_state)) {
  448. GELOGE(INTERNAL_ERROR, "[Invoke][Pop] failed for [%s].", graph_item_->GetName().c_str());
  449. REPORT_CALL_ERROR("E19999", "invoke pop failed for %s.", graph_item_->GetName().c_str());
  450. return INTERNAL_ERROR;
  451. }
  452. if (node_state == nullptr) {
  453. GELOGD("[%s] Got EOF from queue.", graph_item_->GetName().c_str());
  454. return SUCCESS;
  455. }
  456. if (node_state->GetType() == NETOUTPUT) {
  457. // Wait for all inputs become valid
  458. // after PrepareNodes returned. all output tensors and shapes are valid
  459. GE_CHK_STATUS_RET_NOLOG(node_state->GetShapeInferenceState().AwaitShapesReady(*context_));
  460. GE_CHK_STATUS_RET_NOLOG(node_state->AwaitInputTensors(*context_));
  461. GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
  462. continue;
  463. }
  464. GE_CHK_STATUS_RET_NOLOG(node_state->WaitForPrepareDone());
  465. GELOGD("[%s] Start to execute.", node_state->GetName().c_str());
  466. auto shared_task_context = node_state->GetTaskContext();
  467. GE_CHECK_NOTNULL(shared_task_context);
  468. shared_task_context->SetForceInferShape(force_infer_shape_);
  469. std::function<void()> callback;
  470. GE_CHK_STATUS_RET_NOLOG(InitCallback(node_state, callback));
  471. HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_, callback),
  472. "[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
  473. GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
  474. }
  475. }
  476. Status SubgraphExecutor::ScheduleTasks(int group) {
  477. GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str());
  478. subgraph_context_->SetGroup(group);
  479. auto prepare_future = std::async(std::launch::async, [&]() -> Status {
  480. GetContext().SetSessionId(context_->session_id);
  481. GetContext().SetContextId(context_->context_id);
  482. auto ret = PrepareNodes(group);
  483. ready_queue_.Push(nullptr);
  484. schedule_queue_.Push(nullptr);
  485. for (auto &item : prepare_queues_) {
  486. item.second.Push(nullptr);
  487. }
  488. return ret;
  489. });
  490. auto schedule_future = std::async(std::launch::async, [&]() -> Status {
  491. return ScheduleNodes();
  492. });
  493. GELOGD("[%s] Start to execute subgraph.", graph_item_->GetName().c_str());
  494. auto ret = LaunchTasks();
  495. if (ret != SUCCESS) {
  496. subgraph_context_->OnError(ret);
  497. context_->SetErrorCode(ret);
  498. ready_queue_.Stop();
  499. schedule_queue_.Stop();
  500. for (auto &item : prepare_queues_) {
  501. item.second.Stop();
  502. }
  503. prepare_future.wait();
  504. schedule_future.wait();
  505. return ret;
  506. }
  507. GE_CHK_STATUS_RET(prepare_future.get(), "[Invoke][get] [%s] Error occurred in task preparation.",
  508. graph_item_->GetName().c_str());
  509. GE_CHK_STATUS_RET(schedule_future.get(), "[Invoke][get] [%s] Error occurred in task preparation.",
  510. graph_item_->GetName().c_str());
  511. GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
  512. return SUCCESS;
  513. }
  514. Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs) {
  515. return subgraph_context_->GetOutputs(outputs);
  516. }
  517. Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) {
  518. GE_CHK_STATUS_RET(GetOutputs(outputs), "[Invoke][GetOutputs] failed for [%s].", graph_item_->GetName().c_str());
  519. // copy output data from op to designated position
  520. GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc),
  521. "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.",
  522. graph_item_->GetName().c_str());
  523. if (outputs.size() != output_desc.size()) {
  524. GELOGE(INTERNAL_ERROR, "[Check][Size]Number of outputs(%zu) mismatch number of output_desc(%zu).",
  525. outputs.size(), output_desc.size());
  526. REPORT_INNER_ERROR("E19999", "Number of outputs(%zu) mismatch number of output_desc(%zu).",
  527. outputs.size(), output_desc.size());
  528. return INTERNAL_ERROR;
  529. }
  530. return SUCCESS;
  531. }
  532. Status SubgraphExecutor::Synchronize() {
  533. GELOGD("[%s] Synchronize start.", graph_item_->GetName().c_str());
  534. GE_CHK_STATUS_RET_NOLOG(context_->Synchronize(context_->stream));
  535. GELOGD("[%s] Done synchronizing successfully.", graph_item_->GetName().c_str());
  536. return SUCCESS;
  537. }
  538. Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
  539. // get output tensors and tensor desc list
  540. std::vector<TensorValue> outputs;
  541. std::vector<ConstGeTensorDescPtr> output_desc_list;
  542. GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), "[Invoke][GetOutputs][%s] Failed to get output tensors.",
  543. graph_item_->GetName().c_str());
  544. GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list),
  545. "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.",
  546. graph_item_->GetName().c_str());
  547. if (outputs.size() != output_desc_list.size()) {
  548. GELOGE(INTERNAL_ERROR, "[Check][Size][%s] num of output tensors = %zu, num of output tensor desc = %zu not equal",
  549. graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
  550. REPORT_INNER_ERROR("E19999", "%s num of output tensors = %zu, num of output tensor desc = %zu not equal",
  551. graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
  552. return INTERNAL_ERROR;
  553. }
  554. // mapping to parent task context
  555. for (size_t i = 0; i < outputs.size(); ++i) {
  556. int parent_output_index = graph_item_->GetParentOutputIndex(i);
  557. GE_CHECK_GE(parent_output_index, 0);
  558. // update tensor
  559. GELOGD("[%s] Updating output[%zu] to parent output[%d]",
  560. graph_item_->GetName().c_str(),
  561. i,
  562. parent_output_index);
  563. GELOGD("[%s] Updating output tensor, index = %d, tensor = %s",
  564. graph_item_->GetName().c_str(),
  565. parent_output_index,
  566. outputs[i].DebugString().c_str());
  567. GE_CHK_STATUS_RET(task_context.SetOutput(parent_output_index, outputs[i]));
  568. // updating shapes. dynamic format/dtype is not supported.
  569. // It should be noted that even the subgraph is of known shape, it is also necessary to update parent output desc,
  570. // for instance, IfOp may have two known-shaped subgraphs of different output shapes
  571. const auto &output_desc = output_desc_list[i];
  572. auto parent_output_desc = task_context.MutableOutputDesc(parent_output_index);
  573. GE_CHECK_NOTNULL(parent_output_desc);
  574. GELOGD("[%s] Updating output shape[%d] from [%s] to [%s]",
  575. graph_item_->GetName().c_str(),
  576. parent_output_index,
  577. parent_output_desc->MutableShape().ToString().c_str(),
  578. output_desc->GetShape().ToString().c_str());
  579. parent_output_desc->SetShape(output_desc->GetShape());
  580. GELOGD("[%s] Updating output original shape[%d] from [%s] to [%s]",
  581. graph_item_->GetName().c_str(),
  582. parent_output_index,
  583. parent_output_desc->GetOriginShape().ToString().c_str(),
  584. output_desc->GetOriginShape().ToString().c_str());
  585. parent_output_desc->SetOriginShape(output_desc->GetOriginShape());
  586. }
  587. return SUCCESS;
  588. }
  589. Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) {
  590. GELOGD("To enable zero copy, output number = %zu", outputs.size());
  591. const auto &output_edges = graph_item_->GetOutputEdges();
  592. // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
  593. if (outputs.size() != output_edges.size()) {
  594. GELOGE(PARAM_INVALID, "[Check][Size]Output number mismatches, expect = %zu, but given = %zu",
  595. output_edges.size(), outputs.size());
  596. REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu",
  597. output_edges.size(), outputs.size());
  598. return PARAM_INVALID;
  599. }
  600. for (size_t i = 0; i < outputs.size(); ++i) {
  601. auto &output_tensor = outputs[i];
  602. auto &output_node = output_edges[i].first;
  603. int output_idx = output_edges[i].second;
  604. GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s",
  605. graph_item_->GetName().c_str(),
  606. i,
  607. output_node->NodeName().c_str(),
  608. output_idx,
  609. output_tensor.DebugString().c_str());
  610. GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
  611. "[Invoke][SetOutput][%s] Failed to set input tensor[%zu]",
  612. graph_item_->GetName().c_str(), i);
  613. }
  614. GELOGD("Done enabling zero copy for outputs successfully.");
  615. return SUCCESS;
  616. }
  617. Status SubgraphExecutor::PartialExecuteAsync(int task_group) {
  618. return ScheduleTasks(task_group);
  619. }
  620. Status SubgraphExecutor::InitForPartialExecution(const vector<TensorValue> &inputs,
  621. const vector<ConstGeTensorDescPtr> &input_desc) {
  622. if (subgraph_context_ == nullptr) {
  623. return Init(inputs, input_desc);
  624. }
  625. subgraph_context_->Reset();
  626. if (graph_item_->IsDynamic()) {
  627. GE_CHK_STATUS_RET(InitInputsForUnknownShape(inputs, input_desc),
  628. "[%s] Failed to set inputs.",
  629. graph_item_->GetName().c_str());
  630. } else {
  631. GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
  632. "[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph",
  633. graph_item_->GetName().c_str());
  634. }
  635. return SUCCESS;
  636. }
  637. } // namespace hybrid
  638. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示