You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hccl_tailing_optimization_pass.cc 2.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/passes/hccl_tailing_optimization_pass.h"
  17. #include "common/transop_util.h"
  18. namespace ge {
  19. Status HcclTailingOptimizationPass::Run(ComputeGraphPtr graph) {
  20. for (const auto &node : graph->GetDirectNode()) {
  21. GE_CHECK_NOTNULL(node);
  22. if (node->GetType() != HCOMALLREDUCE) {
  23. continue;
  24. }
  25. for (auto &out_node : node->GetOutDataNodes()) {
  26. if (!TransOpUtil::IsTransOp(out_node)) {
  27. continue;
  28. }
  29. GE_CHK_STATUS_RET_NOLOG(CopyControlEdgesForTransOp(out_node));
  30. }
  31. }
  32. return SUCCESS;
  33. }
  34. Status HcclTailingOptimizationPass::CopyControlEdgesForTransOp(NodePtr &first_trans_op) {
  35. auto dst_in_ctrl_anchor = first_trans_op->GetInControlAnchor();
  36. GE_CHECK_NOTNULL(dst_in_ctrl_anchor);
  37. std::set<OutControlAnchorPtr> src_out_ctrl_anchors;
  38. std::vector<NodePtr> trans_op_nodes{first_trans_op};
  39. while (!trans_op_nodes.empty()) {
  40. auto trans_op_node = trans_op_nodes.back();
  41. trans_op_nodes.pop_back();
  42. for (auto &next_node : trans_op_node->GetOutDataNodes()) {
  43. auto in_ctrl_anchor = next_node->GetInControlAnchor();
  44. GE_CHECK_NOTNULL(in_ctrl_anchor);
  45. auto peer_out_ctrl_anchors = in_ctrl_anchor->GetPeerOutControlAnchors();
  46. for (auto src_ctrl_anchor : peer_out_ctrl_anchors) {
  47. GE_CHECK_NOTNULL(src_ctrl_anchor->GetOwnerNode());
  48. src_out_ctrl_anchors.emplace(src_ctrl_anchor);
  49. }
  50. if (TransOpUtil::IsTransOp(next_node)) {
  51. trans_op_nodes.emplace_back(next_node);
  52. }
  53. }
  54. }
  55. for (auto &src_out_ctrl_anchor : src_out_ctrl_anchors) {
  56. if (!src_out_ctrl_anchor->IsLinkedWith(dst_in_ctrl_anchor)) {
  57. GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(src_out_ctrl_anchor, dst_in_ctrl_anchor),
  58. "[Add][Edge] between %s->%s failed",
  59. src_out_ctrl_anchor->GetOwnerNode()->GetName().c_str(),
  60. first_trans_op->GetName().c_str());
  61. }
  62. }
  63. return SUCCESS;
  64. }
  65. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示