You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

next_iteration_pass.cc 17 kB

5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/passes/next_iteration_pass.h"
  17. #include "common/ge/ge_util.h"
  18. #include "graph/common/omg_util.h"
  19. #include "graph/utils/node_utils.h"
  20. using std::string;
  21. namespace ge {
  22. namespace {
  23. const int64_t kLoopType = 1;
  24. }
  25. Status NextIterationPass::Run(ComputeGraphPtr graph) {
  26. GELOGD("NextIterationPass Enter");
  27. /// Enter-----------+
  28. /// +-> Merge -> Switch <- LoopCond <- Cond
  29. /// NextIteration---+
  30. for (auto &node : graph->GetDirectNode()) {
  31. const std::string type = node->GetType();
  32. if ((type != ENTER) && (type != REFENTER)) {
  33. continue;
  34. }
  35. if (GroupEnterNode(node) != SUCCESS) {
  36. GELOGE(INTERNAL_ERROR, "[Group][EnterNode] %s failed.", node->GetName().c_str());
  37. return INTERNAL_ERROR;
  38. }
  39. }
  40. if (FindWhileGroups() != SUCCESS) {
  41. GELOGE(INTERNAL_ERROR, "[Find][WhileGroups] in graph:%s failed.", graph->GetName().c_str());
  42. return INTERNAL_ERROR;
  43. }
  44. if (!VerifyWhileGroup()) {
  45. GELOGE(INTERNAL_ERROR, "[Verify][WhileGroup] in graph:%s failed.", graph->GetName().c_str());
  46. return INTERNAL_ERROR;
  47. }
  48. if (HandleWhileGroup(graph) != SUCCESS) {
  49. GELOGE(FAILED, "[Handle][WhileGroup] in graph:%s failed.", graph->GetName().c_str());
  50. return FAILED;
  51. }
  52. GELOGD("NextIterationPass Leave");
  53. return SUCCESS;
  54. }
  55. ///
  56. /// @brief Group Enter node
  57. /// @param [in] enter_node
  58. /// @return Status
  59. ///
  60. Status NextIterationPass::GroupEnterNode(const NodePtr &enter_node) {
  61. OpDescPtr enter_desc = enter_node->GetOpDesc();
  62. GE_CHECK_NOTNULL(enter_desc);
  63. std::string frame_name;
  64. if (!ge::AttrUtils::GetStr(enter_desc, ENTER_ATTR_FRAME_NAME, frame_name) || frame_name.empty()) {
  65. REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) failed", ENTER_ATTR_FRAME_NAME.c_str(),
  66. enter_desc->GetName().c_str(), enter_desc->GetType().c_str());
  67. GELOGE(FAILED, "[Get][Attr] %s from op:%s(%s) failed", ENTER_ATTR_FRAME_NAME.c_str(),
  68. enter_desc->GetName().c_str(), enter_desc->GetType().c_str());
  69. return FAILED;
  70. }
  71. string batch_label;
  72. if (ge::AttrUtils::GetStr(enter_desc, ATTR_NAME_BATCH_LABEL, batch_label)) {
  73. frame_name += batch_label;
  74. }
  75. auto iter = loop_group_map_.find(frame_name);
  76. if (iter == loop_group_map_.end()) {
  77. LoopCondGroupPtr loop_group = MakeShared<LoopCondGroup>();
  78. if (loop_group == nullptr) {
  79. REPORT_CALL_ERROR("E19999", "New LoopCondGroup failed");
  80. GELOGE(FAILED, "[New][LoopCondGroup] failed.");
  81. return FAILED;
  82. }
  83. loop_group->enter_nodes.emplace_back(enter_node);
  84. loop_group_map_[frame_name] = loop_group;
  85. } else {
  86. iter->second->enter_nodes.emplace_back(enter_node);
  87. }
  88. return SUCCESS;
  89. }
  90. ///
  91. /// @brief Find while groups
  92. /// @return Status
  93. ///
  94. Status NextIterationPass::FindWhileGroups() {
  95. for (const auto &loop_group_iter : loop_group_map_) {
  96. const std::string &frame_name = loop_group_iter.first;
  97. for (const auto &enter_node : loop_group_iter.second->enter_nodes) {
  98. for (const auto &out_node : enter_node->GetOutAllNodes()) {
  99. std::string type;
  100. GE_CHK_STATUS_RET(GetOriginalType(out_node, type), "[Get][OriginalType] failed.");
  101. if ((type != MERGE) && (type != REFMERGE)) {
  102. continue;
  103. }
  104. NodePtr next_node = nullptr;
  105. if (FindTargetNode(out_node, NEXTITERATION, true, next_node) != SUCCESS) {
  106. GELOGE(INTERNAL_ERROR, "[Get][NextIterationNode] failed, frame_name:%s", frame_name.c_str());
  107. return INTERNAL_ERROR;
  108. }
  109. loop_group_iter.second->merge_next_pairs.emplace_back(std::make_pair(out_node, next_node));
  110. NodePtr switch_node = nullptr;
  111. if (FindTargetNode(out_node, SWITCH, false, switch_node) != SUCCESS) {
  112. GELOGE(INTERNAL_ERROR, "[Get][SwitchNode] failed, frame_name:%s.", frame_name.c_str());
  113. return INTERNAL_ERROR;
  114. }
  115. if (switch_node == nullptr) {
  116. continue;
  117. }
  118. if (!AttrUtils::SetInt(switch_node->GetOpDesc(), ATTR_NAME_STREAM_SWITCH_TYPE, kLoopType)) {
  119. REPORT_CALL_ERROR("E19999", "Set Attr:%s to op:%s(%s) failed", ATTR_NAME_STREAM_SWITCH_TYPE.c_str(),
  120. switch_node->GetName().c_str(), switch_node->GetType().c_str());
  121. GELOGE(INTERNAL_ERROR, "[Set][Attr] %s to op:%s(%s) failed", ATTR_NAME_STREAM_SWITCH_TYPE.c_str(),
  122. switch_node->GetName().c_str(), switch_node->GetType().c_str());
  123. return INTERNAL_ERROR;
  124. }
  125. NodePtr loop_cond = nullptr;
  126. if (FindTargetNode(switch_node, LOOPCOND, true, loop_cond) != SUCCESS) {
  127. GELOGE(INTERNAL_ERROR, "[Get][LoopCondNode] failed, frame_name:%s.", frame_name.c_str());
  128. return INTERNAL_ERROR;
  129. }
  130. loop_group_iter.second->switch_nodes.emplace_back(switch_node);
  131. if (loop_group_iter.second->loop_cond == nullptr) {
  132. loop_group_iter.second->loop_cond = loop_cond;
  133. } else if (loop_group_iter.second->loop_cond != loop_cond) {
  134. REPORT_INNER_ERROR("E19999", "Multi LoopCond nodes exist, frame_name:%s, check invalid", frame_name.c_str());
  135. GELOGE(FAILED, "[Check][Param] Multi LoopCond nodes exist, frame_name:%s.", frame_name.c_str());
  136. return FAILED;
  137. }
  138. }
  139. }
  140. }
  141. return SUCCESS;
  142. }
  143. ///
  144. /// @brief Verify if valid
  145. /// @return bool
  146. ///
  147. bool NextIterationPass::VerifyWhileGroup() {
  148. // map<frame_name, LoopCondGroup>
  149. for (const auto &loop_group_iter : loop_group_map_) {
  150. const std::string &frame_name = loop_group_iter.first;
  151. if (frame_name.empty()) {
  152. REPORT_INNER_ERROR("E19999", "Verify while group failed, frame_name is empty");
  153. GELOGE(INTERNAL_ERROR, "[Check][Param] Verify while group failed, frame_name is empty.");
  154. return false;
  155. }
  156. if (loop_group_iter.second->loop_cond == nullptr) {
  157. REPORT_INNER_ERROR("E19999", "Verify while group failed, LoopCond is null, frame_name:%s.", frame_name.c_str());
  158. GELOGE(INTERNAL_ERROR, "[Check][Param] Verify while group failed, LoopCond is null, frame_name:%s.",
  159. frame_name.c_str());
  160. return false;
  161. }
  162. for (const auto &pair_iter : loop_group_iter.second->merge_next_pairs) {
  163. if ((pair_iter.first == nullptr) || (pair_iter.second == nullptr)) {
  164. REPORT_INNER_ERROR("E19999", "Verify while group failed, merge_node/next_node is null, frame_name:%s.",
  165. frame_name.c_str());
  166. GELOGE(INTERNAL_ERROR, "[Check][Param] Verify while group failed, merge_node/next_node is null, frame_name:%s.",
  167. frame_name.c_str());
  168. return false;
  169. }
  170. }
  171. }
  172. return true;
  173. }
  174. ///
  175. /// @brief Handle while group
  176. /// @param [in] graph
  177. /// @return Status
  178. ///
  179. Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) {
  180. for (const auto &loop_cond_iter : loop_group_map_) {
  181. const LoopCondGroup &loop_group = *loop_cond_iter.second;
  182. const std::string &cond_name = loop_cond_iter.second->loop_cond->GetName();
  183. const int64_t group_index = loop_group.loop_cond->GetOpDesc()->GetId();
  184. GELOGI("Handle while group, LoopCond node: %s.", cond_name.c_str());
  185. // Create Active node, Enter->Active->Merge, NextIteration->Active->Merge
  186. NodePtr enter_active = CreateActiveNode(graph, cond_name + "_Enter_" + STREAMACTIVE);
  187. NodePtr next_active = CreateActiveNode(graph, cond_name + "_Next_" + STREAMACTIVE);
  188. if ((enter_active == nullptr) || (next_active == nullptr)) {
  189. GELOGE(INTERNAL_ERROR, "[Create][ActiveNode] failed, cond_name:%s.", cond_name.c_str());
  190. return INTERNAL_ERROR;
  191. }
  192. for (const auto &enter_node : loop_cond_iter.second->enter_nodes) {
  193. // Enter --> Active
  194. if (GraphUtils::AddEdge(enter_node->GetOutControlAnchor(), enter_active->GetInControlAnchor()) != GRAPH_SUCCESS) {
  195. REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed",
  196. enter_node->GetName().c_str(), enter_node->GetType().c_str(),
  197. enter_active->GetName().c_str(), enter_active->GetType().c_str());
  198. GELOGE(INTERNAL_ERROR, "[Add][ControlEdge] between op:%s(%s) and op:%s(%s) failed",
  199. enter_node->GetName().c_str(), enter_node->GetType().c_str(),
  200. enter_active->GetName().c_str(), enter_active->GetType().c_str());
  201. return INTERNAL_ERROR;
  202. }
  203. SetControlFlowGroup(enter_node, group_index);
  204. }
  205. for (const auto &pair : loop_cond_iter.second->merge_next_pairs) {
  206. NodePtr merge_node = pair.first;
  207. NodePtr next_node = pair.second;
  208. // Active --> Merge
  209. if (GraphUtils::AddEdge(enter_active->GetOutControlAnchor(), merge_node->GetInControlAnchor()) != GRAPH_SUCCESS) {
  210. REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed",
  211. enter_active->GetName().c_str(), enter_active->GetType().c_str(),
  212. merge_node->GetName().c_str(), merge_node->GetType().c_str());
  213. GELOGE(INTERNAL_ERROR, "[Add][ControlEdge] between op:%s(%s) and op:%s(%s) failed",
  214. enter_active->GetName().c_str(), enter_active->GetType().c_str(),
  215. merge_node->GetName().c_str(), merge_node->GetType().c_str());
  216. return INTERNAL_ERROR;
  217. }
  218. // NextIteration --> Active
  219. if (GraphUtils::AddEdge(next_node->GetOutControlAnchor(), next_active->GetInControlAnchor()) != GRAPH_SUCCESS) {
  220. REPORT_CALL_ERROR("E19999", "Add control edge between op:%s(%s) and op:%s(%s) failed",
  221. next_node->GetName().c_str(), next_node->GetType().c_str(),
  222. next_active->GetName().c_str(), next_active->GetType().c_str());
  223. GELOGE(INTERNAL_ERROR, "[Add][ControlEdge] between op:%s(%s) and op:%s(%s) failed",
  224. next_node->GetName().c_str(), next_node->GetType().c_str(),
  225. next_active->GetName().c_str(), next_active->GetType().c_str());
  226. return INTERNAL_ERROR;
  227. }
  228. // break link between NextIteration and Merge
  229. if (BreakNextIteration(next_node, merge_node) != SUCCESS) {
  230. GELOGE(INTERNAL_ERROR, "[Break][NextIteration] failed, next_node:%s, merge_node:%s",
  231. next_node->GetName().c_str(), merge_node->GetName().c_str());
  232. return INTERNAL_ERROR;
  233. }
  234. SetControlFlowGroup(next_node, group_index);
  235. SetControlFlowGroup(merge_node, group_index);
  236. }
  237. if ((SetActiveLabelList(enter_active, {cond_name}) != SUCCESS) ||
  238. (SetActiveLabelList(next_active, {cond_name}) != SUCCESS)) {
  239. GELOGE(INTERNAL_ERROR, "[Set][ActiveLabelList] failed, cond_name:%s.", cond_name.c_str());
  240. return INTERNAL_ERROR;
  241. }
  242. SetControlFlowGroup(loop_group.loop_cond, group_index);
  243. SetControlFlowGroup(enter_active, group_index);
  244. SetControlFlowGroup(next_active, group_index);
  245. HandleSwitchExitNodes(loop_group, group_index);
  246. }
  247. return SUCCESS;
  248. }
  249. ///
  250. /// @brief Mark force unknown for Exit node
  251. /// @param [in] group of LoopCond
  252. /// @param [in] index of LoopCond Node
  253. /// @return void
  254. ///
  255. void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, int64_t group_index) {
  256. for (const auto &switch_node : loop_group.switch_nodes) {
  257. SetControlFlowGroup(switch_node, group_index);
  258. for (const auto &node : switch_node->GetOutDataNodes()) {
  259. std::string node_type;
  260. (void)GetOriginalType(node, node_type);
  261. if (kExitOpTypes.count(node_type) > 0) {
  262. SetControlFlowGroup(node, group_index);
  263. }
  264. }
  265. }
  266. }
  267. ///
  268. /// @brief Create Active Node
  269. /// @param [in] graph
  270. /// @param [in] name
  271. /// @return ge::NodePtr
  272. ///
  273. NodePtr NextIterationPass::CreateActiveNode(ComputeGraphPtr &graph, const std::string &name) {
  274. OpDescPtr op_desc = MakeShared<OpDesc>(name, STREAMACTIVE);
  275. if (op_desc == nullptr) {
  276. REPORT_CALL_ERROR("E19999", "New OpDesc failed");
  277. GELOGE(FAILED, "[New][OpDesc] failed");
  278. return nullptr;
  279. }
  280. GELOGI("Create StreamActive op:%s.", op_desc->GetName().c_str());
  281. NodePtr active_node = graph->AddNode(op_desc);
  282. if (active_node == nullptr) {
  283. REPORT_CALL_ERROR("E19999", "Add node:%s(%s) to graph:%s failed",
  284. op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
  285. GELOGE(INTERNAL_ERROR, "[Add][Node] %s(%s) to graph:%s failed",
  286. op_desc->GetName().c_str(), op_desc->GetType().c_str(), graph->GetName().c_str());
  287. return nullptr;
  288. }
  289. if (SetSwitchBranchNodeLabel(active_node, name) != SUCCESS) {
  290. REPORT_CALL_ERROR("E19999", "Set switch branch node label:%s to node:%s(%s) failed",
  291. name.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
  292. GELOGE(INTERNAL_ERROR, "[Set][SwitchBranchNodeLabel] %s to node:%s(%s) failed",
  293. name.c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
  294. return nullptr;
  295. }
  296. return active_node;
  297. }
  298. ///
  299. /// @brief Break NextIteration Link & add name to merge attr
  300. /// @param [in] next_node
  301. /// @param [in] merge_node
  302. /// @return Status
  303. ///
  304. Status NextIterationPass::BreakNextIteration(const NodePtr &next_node, NodePtr &merge_node) {
  305. if ((merge_node == nullptr) || (next_node == nullptr)) {
  306. GELOGE(PARAM_INVALID, "[Check][Param] merge node or next node is nullptr.");
  307. return PARAM_INVALID;
  308. }
  309. for (const auto &in_anchor : merge_node->GetAllInDataAnchors()) {
  310. OutDataAnchorPtr out_anchor = in_anchor->GetPeerOutAnchor();
  311. if ((out_anchor == nullptr) || (out_anchor->GetOwnerNode() != next_node)) {
  312. continue;
  313. }
  314. if (GraphUtils::RemoveEdge(out_anchor, in_anchor) != SUCCESS) {
  315. REPORT_CALL_ERROR("E19999", "Remove edge between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed",
  316. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  317. out_anchor->GetIdx(),
  318. merge_node->GetName().c_str(), merge_node->GetType().c_str(), in_anchor->GetIdx());
  319. GELOGE(INTERNAL_ERROR, "[Remove][Edge] between op:%s(%s)(index:%d) and op:%s(%s)(index:%d) failed",
  320. out_anchor->GetOwnerNode()->GetName().c_str(), out_anchor->GetOwnerNode()->GetType().c_str(),
  321. out_anchor->GetIdx(), merge_node->GetName().c_str(), merge_node->GetType().c_str(), in_anchor->GetIdx());
  322. return INTERNAL_ERROR;
  323. }
  324. if (SetNextIteration(merge_node, next_node) != SUCCESS) {
  325. REPORT_CALL_ERROR("E19999", "Set attr NEXT_ITERATION value:%s to node:%s(%s) failed",
  326. next_node->GetName().c_str(), merge_node->GetName().c_str(), merge_node->GetType().c_str());
  327. GELOGE(INTERNAL_ERROR, "[Set][Attr] NEXT_ITERATION value:%s to node:%s(%s) failed",
  328. next_node->GetName().c_str(), merge_node->GetName().c_str(), merge_node->GetType().c_str());
  329. return INTERNAL_ERROR;
  330. }
  331. }
  332. return SUCCESS;
  333. }
  334. ///
  335. /// @brief find target node
  336. /// @param [in] node
  337. /// @param [in] target_type
  338. /// @param [in] is_input
  339. /// @param [out] target_node
  340. /// @return Status
  341. ///
  342. Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string &target_type, bool is_input,
  343. NodePtr &target_node) {
  344. if (node == nullptr) {
  345. REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid");
  346. GELOGE(PARAM_INVALID, "[Check][Param] node is nullptr.");
  347. return PARAM_INVALID;
  348. }
  349. std::vector<NodePtr> nodes;
  350. if (is_input) {
  351. for (const auto &tmp_node : node->GetInDataNodes()) {
  352. nodes.emplace_back(tmp_node);
  353. }
  354. } else {
  355. for (const auto &tmp_node : node->GetOutDataNodes()) {
  356. nodes.emplace_back(tmp_node);
  357. }
  358. }
  359. for (const auto &tmp_node : nodes) {
  360. std::string type;
  361. GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "[Get][NodeType] failed.");
  362. if ((target_type == LOOPCOND) && (type == target_type)) {
  363. target_node = tmp_node;
  364. break;
  365. } else if ((type == target_type) || (type == "Ref" + target_type)) {
  366. target_node = tmp_node;
  367. break;
  368. }
  369. }
  370. if ((target_type != SWITCH) && (target_node == nullptr)) {
  371. REPORT_INNER_ERROR("E19999", "Find target_type:%s node around node:%s(%s) failed",
  372. target_type.c_str(), node->GetName().c_str(), node->GetType().c_str());
  373. GELOGE(INTERNAL_ERROR, "[Check][Param] Find target_type:%s node around node:%s(%s) failed",
  374. target_type.c_str(), node->GetName().c_str(), node->GetType().c_str());
  375. return INTERNAL_ERROR;
  376. }
  377. return SUCCESS;
  378. }
  379. ///
  380. /// @brief Clear Status, used for subgraph pass
  381. /// @return SUCCESS
  382. ///
  383. Status NextIterationPass::ClearStatus() {
  384. loop_group_map_.clear();
  385. return SUCCESS;
  386. }
  387. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示