You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

memcpy_addr_async_pass.cc 23 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/passes/memcpy_addr_async_pass.h"
  17. #include "common/ge/ge_util.h"
  18. #include "framework/common/debug/log.h"
  19. #include "graph/utils/node_utils.h"
  20. #include "graph/utils/op_desc_utils.h"
  21. #include "graph/utils/tensor_utils.h"
  22. namespace ge {
  23. Status MemcpyAddrAsyncPass::Run(ComputeGraphPtr graph) {
  24. GE_CHECK_NOTNULL(graph);
  25. if (graph->GetGraphUnknownFlag()) {
  26. for (const auto &node : graph->GetAllNodes()) {
  27. if (node->GetType() == STREAMSWITCH) {
  28. auto sub_graph = node->GetOwnerComputeGraph();
  29. if (sub_graph != nullptr && !sub_graph->GetGraphUnknownFlag()) {
  30. GE_CHK_STATUS_RET(AddMemcpyAsyncNode(node),
  31. "[Add][MemcpyAsyncNode] for node:%s in known subgraph:%s failed.",
  32. node->GetName().c_str(), sub_graph->GetName().c_str());
  33. }
  34. }
  35. }
  36. GELOGD("Graph[%s] is unknown graph, skip.", graph->GetName().c_str());
  37. return SUCCESS;
  38. }
  39. int64_t value = 0;
  40. rtError_t rt_ret = rtGetRtCapability(FEATURE_TYPE_MEMCPY, MEMCPY_INFO_SUPPORT_ZEROCOPY, &value);
  41. if (rt_ret != RT_ERROR_NONE) {
  42. REPORT_CALL_ERROR("E19999", "Call rtGetRtCapability failed, ret = 0x%X", rt_ret);
  43. GELOGE(RT_FAILED, "[Call][RtGetRtCapability] failed, ret = 0x%x.", rt_ret);
  44. return RT_FAILED;
  45. }
  46. if (value == RT_CAPABILITY_NOT_SUPPORT) {
  47. GELOGW("Not support zero copy, skip it.");
  48. return SUCCESS;
  49. }
  50. for (auto &node : graph->GetAllNodes()) {
  51. auto op_desc = node->GetOpDesc();
  52. GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
  53. if (op_desc->GetType() == STREAMSWITCHN || op_desc->GetType() == STREAMMERGE) {
  54. Status ret = AddMemcpyAddrAsyncNode(graph, node);
  55. if (ret != SUCCESS) {
  56. GELOGE(ret, "[Add][MemcpyAddrAsyncNode] for %s in graph:%s failed.", node->GetName().c_str(),
  57. graph->GetName().c_str());
  58. return ret;
  59. }
  60. }
  61. // handle data->netoutput, const->netoutput in root graph, use mem_addr_async to improve performance
  62. if (op_desc->GetType() == NETOUTPUT) {
  63. // check this netoutput is on root graph
  64. if (node->GetOwnerComputeGraph()->GetParentNode() == nullptr) {
  65. Status ret = InsertMemAddrAsyncNodeBeforeNetoutput(node->GetOwnerComputeGraph(), node);
  66. if (ret != SUCCESS) {
  67. GELOGE(ret, "[Insert][MemAddrAsyncNode] Before Netoutput for node:%s in graph:%s failed.",
  68. node->GetName().c_str(), graph->GetName().c_str());
  69. return ret;
  70. }
  71. }
  72. }
  73. }
  74. return SUCCESS;
  75. }
  76. Status MemcpyAddrAsyncPass::AddMemcpyAsyncNode(const NodePtr &node) {
  77. GE_CHECK_NOTNULL(node);
  78. GELOGI("Start add memcpyasync node in front of node %s", node->GetName().c_str());
  79. known_sub_graph_ = true;
  80. auto sub_graph = node->GetOwnerComputeGraph();
  81. for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
  82. OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  83. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  84. auto memcpy_async_node = CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor, node);
  85. if (memcpy_async_node == nullptr) {
  86. GELOGE(INTERNAL_ERROR, "[Create][MemcpyAddrAsyncNode] for node:%s in subgraph failed.",
  87. node->GetName().c_str());
  88. return INTERNAL_ERROR;
  89. }
  90. Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor, in_data_anchor, memcpy_async_node);
  91. if (ret != SUCCESS) {
  92. GELOGE(ret, "[Insert][MemcpyAddrAsyncNode] failed, memcpy_async_node:%s.", memcpy_async_node->GetName().c_str());
  93. return ret;
  94. }
  95. }
  96. return SUCCESS;
  97. }
  98. Status MemcpyAddrAsyncPass::AddMemcpyAddrAsyncNode(const ComputeGraphPtr &graph, const NodePtr &node) {
  99. GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str());
  100. for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
  101. OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  102. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  103. NodePtr in_node = peer_out_anchor->GetOwnerNode();
  104. if (in_node->GetType() == DATA) {
  105. ComputeGraphPtr owner_graph = in_node->GetOwnerComputeGraph();
  106. GE_CHECK_NOTNULL(owner_graph);
  107. // Data is in parent_graph
  108. if (owner_graph->GetParentGraph() == nullptr) {
  109. GELOGI("Need to insert MemcpyAddrAsync directly when data in parent graph.");
  110. NodePtr memcpy_addr_async_node = CreateMemcpyAddrAsyncNode(graph, peer_out_anchor, node);
  111. GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr,
  112. GELOGE(INTERNAL_ERROR, "[Create][MemcpyAddrAsyncNode] failed, node:%s.",
  113. node->GetName().c_str());
  114. return INTERNAL_ERROR);
  115. Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor, in_data_anchor, memcpy_addr_async_node);
  116. GE_IF_BOOL_EXEC(ret != SUCCESS,
  117. GELOGE(ret, "[Insert][MemcpyAddrAsyncNode] failed, memcpy_addr_async_node:%s.",
  118. memcpy_addr_async_node->GetName().c_str());
  119. return ret);
  120. } else {
  121. uint32_t parent_index = 0;
  122. if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  123. REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  124. in_node->GetName().c_str(), in_node->GetType().c_str());
  125. GELOGE(INTERNAL_ERROR, "[Get][Attr] %s from op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  126. in_node->GetName().c_str(), in_node->GetType().c_str());
  127. return INTERNAL_ERROR;
  128. }
  129. // Data is in sub_graph
  130. GELOGI("Need to find data in parent graph, then insert MemcpyAddrAsync.");
  131. NodePtr parent_node = owner_graph->GetParentNode();
  132. user_data_for_known_ = in_node;
  133. out_of_user_data_for_known_ = node;
  134. peer_out_anchor_for_known_ = peer_out_anchor;
  135. in_anchor_for_known_ = in_data_anchor;
  136. FindUserData(parent_node, parent_index);
  137. if (find_user_data_) {
  138. GELOGI("Insert memcpy_addr_async for non_dynamic.");
  139. GE_CHECK_NOTNULL(peer_out_anchor_);
  140. NodePtr memcpy_addr_async_node = CreateMemcpyAddrAsyncNode(graph, peer_out_anchor_, out_of_user_data_);
  141. GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr,
  142. GELOGE(INTERNAL_ERROR, "[Create][MemcpyAddrAsyncNode] failed, out_of_user_data_:%s.",
  143. out_of_user_data_->GetName().c_str());
  144. return INTERNAL_ERROR);
  145. Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor_, in_anchor_, memcpy_addr_async_node);
  146. GE_IF_BOOL_EXEC(ret != SUCCESS,
  147. GELOGE(ret, "[Insert][MemcpyAddrAsyncNode] failed, memcpy_addr_async_node:%s.",
  148. memcpy_addr_async_node->GetName().c_str());
  149. return ret);
  150. }
  151. if (find_user_data_for_known_) {
  152. GELOGI("Insert memcpy_addr_async for known graph.");
  153. auto sub_graph = user_data_for_known_->GetOwnerComputeGraph();
  154. NodePtr memcpy_addr_async_node =
  155. CreateMemcpyAddrAsyncNode(sub_graph, peer_out_anchor_for_known_, out_of_user_data_for_known_);
  156. GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr,
  157. GELOGE(INTERNAL_ERROR,
  158. "[Create][MemcpyAddrAsyncNode] for known failed, out_of_user_data_for_known_:%s",
  159. out_of_user_data_for_known_->GetName().c_str());
  160. return INTERNAL_ERROR);
  161. Status ret =
  162. InsertMemcpyAddrAsyncNode(peer_out_anchor_for_known_, in_anchor_for_known_, memcpy_addr_async_node);
  163. GE_IF_BOOL_EXEC(ret != SUCCESS,
  164. GELOGE(ret, "[Insert][MemcpyAddrAsyncNode] for known failed, memcpy_addr_async_node:%s.",
  165. memcpy_addr_async_node->GetName().c_str());
  166. return ret);
  167. }
  168. }
  169. }
  170. }
  171. return SUCCESS;
  172. }
  173. void MemcpyAddrAsyncPass::FindUserDataForKnown(const NodePtr &parent_node, uint32_t &parent_index) {
  174. GELOGI("Start FindUserDataForKnown of %s.", parent_node->GetName().c_str());
  175. if (user_data_for_known_->GetOpDesc() == nullptr) {
  176. GELOGI("Cannot get op_desc of %s.", user_data_for_known_->GetName().c_str());
  177. return;
  178. }
  179. string src_var_name;
  180. if (ge::AttrUtils::GetStr(user_data_for_known_->GetOpDesc(), REF_VAR_SRC_VAR_NAME, src_var_name)) {
  181. GELOGI("The data in known graph is variable, no need to insert memcpy_addr_async.");
  182. find_user_data_for_known_ = false;
  183. return;
  184. } else {
  185. find_user_data_for_known_ = true;
  186. }
  187. }
  188. void MemcpyAddrAsyncPass::FindUserDataForNonDynamic(const ge::NodePtr &parent_node, uint32_t &parent_index) {
  189. GELOGI("Start to FindUserDataForNonDynamic of %s.", parent_node->GetName().c_str());
  190. InDataAnchorPtr in_data_anchor = parent_node->GetInDataAnchor(parent_index);
  191. OutDataAnchorPtr out_anchor = in_data_anchor->GetPeerOutAnchor();
  192. GE_IF_BOOL_EXEC(out_anchor == nullptr,
  193. REPORT_INNER_ERROR("E19999", "Index:%u in data node of op:%s(%s) not exist, check invalid",
  194. parent_index, parent_node->GetName().c_str(), parent_node->GetType().c_str());
  195. GELOGE(INTERNAL_ERROR, "[Get][PeerOutAnchor] Index:%u in data node of op:%s(%s) not exist",
  196. parent_index, parent_node->GetName().c_str(), parent_node->GetType().c_str());
  197. return);
  198. NodePtr in_node = out_anchor->GetOwnerNode();
  199. GELOGI("in_node of parent_node is %s.", in_node->GetName().c_str());
  200. if (in_node->GetType() == DATA) {
  201. if (in_node->GetOwnerComputeGraph()->GetParentGraph() != nullptr) {
  202. // DATA is in sub graph again, update user_data of known firstly
  203. user_data_for_known_ = in_node;
  204. out_of_user_data_for_known_ = parent_node;
  205. peer_out_anchor_for_known_ = out_anchor;
  206. in_anchor_for_known_ = in_data_anchor;
  207. NodePtr pre_in_node = in_node->GetOwnerComputeGraph()->GetParentNode();
  208. if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  209. REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  210. in_node->GetName().c_str(), in_node->GetType().c_str());
  211. GELOGE(INTERNAL_ERROR, "[Set][Attr] %s to op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  212. in_node->GetName().c_str(), in_node->GetType().c_str());
  213. return;
  214. }
  215. FindUserData(pre_in_node, parent_index);
  216. } else {
  217. // DATA is in parent graph and not has input
  218. user_data_ = in_node;
  219. out_of_user_data_ = parent_node;
  220. peer_out_anchor_ = out_anchor;
  221. in_anchor_ = in_data_anchor;
  222. find_user_data_ = true;
  223. GELOGI("%s connect with %s, will insert memcpyaddr.", user_data_->GetName().c_str(),
  224. out_of_user_data_->GetName().c_str());
  225. }
  226. } else if (in_node->GetType() == IF || in_node->GetType() == WHILE || in_node->GetType() == CASE) {
  227. if (!AttrUtils::GetInt(parent_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  228. REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  229. parent_node->GetName().c_str(), parent_node->GetType().c_str());
  230. GELOGE(INTERNAL_ERROR, "[Get][Attr] %s from op:%s(%s) failed", ATTR_NAME_PARENT_NODE_INDEX.c_str(),
  231. parent_node->GetName().c_str(), parent_node->GetType().c_str());
  232. return;
  233. }
  234. FindUserData(in_node, parent_index);
  235. } else {
  236. GELOGI("%s connect with %s, which is not user_data.", parent_node->GetName().c_str(), in_node->GetName().c_str());
  237. find_user_data_ = false;
  238. }
  239. }
  240. void MemcpyAddrAsyncPass::FindUserData(const NodePtr &parent_node, uint32_t &parent_index) {
  241. auto parent_op_desc = parent_node->GetOpDesc();
  242. if (parent_op_desc == nullptr) {
  243. GELOGI("Cannot get op_desc of %s.", parent_node->GetName().c_str());
  244. return;
  245. }
  246. bool is_unknown_shape = false;
  247. if (parent_node->GetType() == PARTITIONEDCALL &&
  248. AttrUtils::GetBool(parent_op_desc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape) && !is_unknown_shape) {
  249. FindUserDataForKnown(parent_node, parent_index);
  250. } else {
  251. FindUserDataForNonDynamic(parent_node, parent_index);
  252. }
  253. }
  254. NodePtr MemcpyAddrAsyncPass::CreateMemcpyAddrAsyncNode(const ComputeGraphPtr &graph,
  255. const OutDataAnchorPtr &out_data_anchor,
  256. const NodePtr &out_of_user_data) {
  257. GELOGD("Start CreateMemcpyAddrAsyncNode.");
  258. static uint32_t new_node_index = 0;
  259. OpDescPtr pre_op_desc = out_data_anchor->GetOwnerNode()->GetOpDesc();
  260. GE_CHK_BOOL_EXEC(pre_op_desc != nullptr,
  261. REPORT_INNER_ERROR("E19999", "OpDesc in node is nullptr, check invalid");
  262. return nullptr, "[Get][OpDesc] failed, Op_desc of pre node is invalid.");
  263. OpDescPtr op_desc = nullptr;
  264. if (known_sub_graph_) { // insert memcpyasync node when known sub graph
  265. string node_name = pre_op_desc->GetName() + "_" + MEMCPYASYNC + "_" + std::to_string(new_node_index++);
  266. op_desc = MakeShared<OpDesc>(node_name, MEMCPYASYNC);
  267. } else {
  268. string node_name = pre_op_desc->GetName() + "_" + MEMCPYADDRASYNC + "_" + std::to_string(new_node_index++);
  269. op_desc = MakeShared<OpDesc>(node_name, MEMCPYADDRASYNC);
  270. }
  271. GE_CHECK_NOTNULL_EXEC(op_desc, REPORT_CALL_ERROR("E19999", "New OpDesc failed"); return nullptr);
  272. if (op_desc->AddInputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) {
  273. REPORT_CALL_ERROR("E19999", "Add input desc to op:%s(%s) failed",
  274. pre_op_desc->GetName().c_str(), pre_op_desc->GetType().c_str());
  275. GELOGE(INTERNAL_ERROR, "[Add][InputDesc] to op:%s(%s) failed",
  276. pre_op_desc->GetName().c_str(), pre_op_desc->GetType().c_str());
  277. return nullptr;
  278. }
  279. if (op_desc->AddOutputDesc(pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())) != GRAPH_SUCCESS) {
  280. REPORT_CALL_ERROR("E19999", "Add output desc to op:%s(%s) failed",
  281. pre_op_desc->GetName().c_str(), pre_op_desc->GetType().c_str());
  282. GELOGE(INTERNAL_ERROR, "[Add][OutputDesc] to op:%s(%s) failed",
  283. pre_op_desc->GetName().c_str(), pre_op_desc->GetType().c_str());
  284. return nullptr;
  285. }
  286. string stream_label;
  287. if (AttrUtils::GetStr(out_of_user_data->GetOpDesc(), ATTR_NAME_STREAM_LABEL, stream_label)) {
  288. (void)AttrUtils::SetStr(op_desc, ATTR_NAME_STREAM_LABEL, stream_label);
  289. GELOGD("Node %s set stream label: %s", op_desc->GetName().c_str(), stream_label.c_str());
  290. }
  291. bool rts_label_node = false;
  292. if (AttrUtils::GetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_RTS_LABEL_NODE, rts_label_node)) {
  293. (void)AttrUtils::SetBool(op_desc, ATTR_NAME_RTS_LABEL_NODE, rts_label_node);
  294. GELOGD("Node %s set rts label node attribute", op_desc->GetName().c_str());
  295. }
  296. bool labeled_input = false;
  297. (void)ge::AttrUtils::GetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, labeled_input);
  298. if (labeled_input) {
  299. if (!ge::AttrUtils::SetBool(out_of_user_data->GetOpDesc(), ATTR_NAME_NODE_CONNECT_INPUT, false)) {
  300. REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed", ATTR_NAME_NODE_CONNECT_INPUT.c_str(),
  301. out_of_user_data->GetName().c_str(), out_of_user_data->GetType().c_str());
  302. GELOGE(FAILED, "[Set][Attr] %s to op:%s(%s) failed", ATTR_NAME_NODE_CONNECT_INPUT.c_str(),
  303. out_of_user_data->GetName().c_str(), out_of_user_data->GetType().c_str());
  304. return nullptr;
  305. }
  306. if (!ge::AttrUtils::SetBool(op_desc, ATTR_NAME_NODE_CONNECT_INPUT, true)) {
  307. REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed", ATTR_NAME_NODE_CONNECT_INPUT.c_str(),
  308. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  309. GELOGE(FAILED, "[Set][Attr] %s to op:%s(%s) failed", ATTR_NAME_NODE_CONNECT_INPUT.c_str(),
  310. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  311. return nullptr;
  312. }
  313. }
  314. NodePtr memcpy_addr_async_node = graph->AddNode(op_desc);
  315. GE_CHECK_NOTNULL_EXEC(memcpy_addr_async_node,
  316. REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed",
  317. op_desc->GetName().c_str(), op_desc->GetType().c_str(),
  318. graph->GetName().c_str());
  319. return nullptr);
  320. return memcpy_addr_async_node;
  321. }
  322. Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &out_anchor,
  323. const InDataAnchorPtr &in_anchor, const NodePtr &node) {
  324. // insert memcpy_addr of each user_data and out_of_user_data
  325. if (GraphUtils::RemoveEdge(out_anchor, in_anchor) != GRAPH_SUCCESS) {
  326. REPORT_CALL_ERROR("E19999", "Remove edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed",
  327. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  328. out_anchor->GetIdx(), in_anchor->GetOwnerNode()->GetName().c_str(),
  329. in_anchor->GetOwnerNode()->GetType().c_str(), in_anchor->GetIdx());
  330. GELOGE(INTERNAL_ERROR, "[Remove][Edge] between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed",
  331. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  332. out_anchor->GetIdx(), in_anchor->GetOwnerNode()->GetName().c_str(),
  333. in_anchor->GetOwnerNode()->GetType().c_str(), in_anchor->GetIdx());
  334. return INTERNAL_ERROR;
  335. }
  336. if (GraphUtils::AddEdge(out_anchor, node->GetInDataAnchor(0)) != GRAPH_SUCCESS) {
  337. REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:%d) and op:%s(%s)(index:0) failed",
  338. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  339. out_anchor->GetIdx(), node->GetName().c_str(), node->GetType().c_str());
  340. GELOGE(INTERNAL_ERROR, "[Add][Edge] between op:%s(%s)(index:%d) and op:%s(%s)(index:0) failed",
  341. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  342. out_anchor->GetIdx(), node->GetName().c_str(), node->GetType().c_str());
  343. return INTERNAL_ERROR;
  344. }
  345. if (GraphUtils::AddEdge(node->GetOutDataAnchor(0), in_anchor) != GRAPH_SUCCESS) {
  346. REPORT_CALL_ERROR("E19999", "Add edge between op:%s(%s)(index:0) and op:%s(%s)(index:%d) failed",
  347. node->GetName().c_str(), node->GetType().c_str(), in_anchor->GetOwnerNode()->GetName().c_str(),
  348. in_anchor->GetOwnerNode()->GetType().c_str(), in_anchor->GetIdx());
  349. GELOGE(INTERNAL_ERROR, "[Add][Edge] between op:%s(%s)(index:0) and op:%s(%s)(index:%d) failed",
  350. node->GetName().c_str(), node->GetType().c_str(), in_anchor->GetOwnerNode()->GetName().c_str(),
  351. in_anchor->GetOwnerNode()->GetType().c_str(), in_anchor->GetIdx());
  352. return INTERNAL_ERROR;
  353. }
  354. return SUCCESS;
  355. }
  356. Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node) {
  357. GELOGD("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str());
  358. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  359. auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx());
  360. GE_CHECK_NOTNULL(in_node);
  361. auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  362. if ((in_node->GetType() != CONSTANT) &&
  363. (in_node->GetType() != CONSTANTOP) &&
  364. (in_node->GetType() != DATA)) {
  365. continue;
  366. }
  367. auto desc = in_node->GetOpDesc();
  368. GE_CHECK_NOTNULL(desc);
  369. if (IsEmptyTenor(desc->GetOutputDesc(peer_out_anchor->GetIdx()).GetShape())) {
  370. continue;
  371. }
  372. GELOGI("Need to insert MemcpyAddrAsync before netoutput on parent graph.");
  373. NodePtr memcpy_addr_async_node = CreateMemcpyAddrAsyncNode(graph, peer_out_anchor, in_node);
  374. GE_IF_BOOL_EXEC(memcpy_addr_async_node == nullptr,
  375. GELOGE(INTERNAL_ERROR, "[Create][MemcpyAddrAsyncNode] failed, in_node:%s.",
  376. in_node->GetName().c_str());
  377. return INTERNAL_ERROR);
  378. Status ret = InsertMemcpyAddrAsyncNode(peer_out_anchor, in_data_anchor, memcpy_addr_async_node);
  379. GE_IF_BOOL_EXEC(ret != SUCCESS,
  380. GELOGE(ret, "[Insert][MemcpyAddrAsyncNode] failed, memcpy_addr_async_node:%s.",
  381. memcpy_addr_async_node->GetName().c_str());
  382. return ret);
  383. GELOGI("Insert mem_addr_async node %s success between %s and %s.", memcpy_addr_async_node->GetName().c_str(),
  384. in_node->GetName().c_str(), node->GetName().c_str());
  385. // if src node is const, need to update attr and offset here because this pass process is after offset set.
  386. if ((in_node->GetType() == CONSTANT) || (in_node->GetType() == CONSTANTOP)) {
  387. NodeUtils::UpdateIsInputConst(memcpy_addr_async_node);
  388. auto output_desc = node->GetOpDesc();
  389. GE_CHECK_NOTNULL(output_desc);
  390. auto output_tensor_desc = output_desc->MutableInputDesc(static_cast<uint32_t>(in_data_anchor->GetIdx()));
  391. int64_t data_offset = 0;
  392. (void)TensorUtils::GetDataOffset(*output_tensor_desc, data_offset);
  393. auto input_tensor = memcpy_addr_async_node->GetOpDesc()->MutableInputDesc(0);
  394. GELOGI("Need update const Offset %ld to op [%s]", data_offset, memcpy_addr_async_node->GetName().c_str());
  395. TensorUtils::SetDataOffset(*input_tensor, data_offset);
  396. TensorUtils::SetDataOffset(*output_tensor_desc, 0);
  397. }
  398. }
  399. NodeUtils::UpdateIsInputConst(node);
  400. return SUCCESS;
  401. }
  402. bool MemcpyAddrAsyncPass::IsEmptyTenor(const GeShape &shape) const {
  403. for (const auto dim : shape.GetDims()) {
  404. if (dim == 0) {
  405. return true;
  406. }
  407. }
  408. return false;
  409. }
  410. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示