You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_partition.h 8.8 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_PARTITION_GRAPH_PARTITION_H_
  17. #define GE_GRAPH_PARTITION_GRAPH_PARTITION_H_
  18. #include <list>
  19. #include <map>
  20. #include <memory>
  21. #include <set>
  22. #include <string>
  23. #include <unordered_map>
  24. #include <unordered_set>
  25. #include <utility>
  26. #include <vector>
  27. #include "graph/compute_graph.h"
  28. #include "graph/manager/graph_manager_utils.h"
  29. #include "graph/operator_reg.h"
  30. #include "graph/partition/engine_place.h"
  31. namespace ge {
  32. using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>;
  33. using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>;
  34. using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>;
  35. using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>;
  36. using ClusterSet = std::unordered_set<size_t>;
  37. class Cluster {
  38. public:
  39. size_t index_; // corresponding to rank of node
  40. ClusterSet in_clu_; // inClusters index
  41. ClusterSet out_clu_; // outClusters index
  42. std::list<NodePtr> nodes_; // including node of this cluster
  43. std::string engine_name_; // data like must be a specific engine
  44. std::string stream_label_;
  45. explicit Cluster(size_t index, std::string engine, std::string stream)
  46. : index_(index), engine_name_(std::move(engine)), stream_label_(std::move(stream)) {}
  47. ~Cluster() = default;
  48. };
  49. using ClusterPtr = std::shared_ptr<Cluster>;
  50. class GraphPartitioner {
  51. public:
  52. /// Partition() can only be called in Partition mode.
  53. /// MergeAfterSubGraphOptimization() can only be called in Merge mode.
  54. /// After Partition(), change to Merge mode. After MergeAfterSubGraphOptimization(), change to Partition mode
  55. enum Mode { kPartitioning, kSecondPartitioning, kMerging };
  56. GraphPartitioner() : partition_times_(0){};
  57. ~GraphPartitioner() = default;
  58. // the main method that partitions the graph
  59. // input_size and output_size are the number of inputs and outputs in the original graph
  60. Status Partition(ComputeGraphPtr compute_graph, Mode mode);
  61. // after partition, all SubGraph will be merged back based on end<->pld.
  62. Status MergeAfterSubGraphOptimization(ComputeGraphPtr &output_merged_compute_graph,
  63. const ComputeGraphPtr &original_compute_graph);
  64. // Return all subgraphs
  65. const Graph2SubGraphInfoList &GetSubGraphMap();
  66. private:
  67. Status MergeSubGraph(ge::ComputeGraphPtr &output_merged_compute_graph,
  68. const ge::ComputeGraphPtr &original_compute_graph);
  69. Status PartitionSubGraph(ge::ComputeGraphPtr compute_graph, Mode mode);
  70. Status MergeAllSubGraph(ComputeGraphPtr &output_merged_compute_graph,
  71. const std::vector<SubGraphInfoPtr> &sub_graph_list);
  72. Status CheckIfEnd2PldEmpty(ComputeGraphPtr &output_merged_compute_graph);
  73. // Run engine placer, assign engine, check support amd init all clusters
  74. Status Initialize(ComputeGraphPtr compute_graph);
  75. /// add pld and end nodes between two sub-graphs for the specific anchors
  76. /// all anchors are in original graph
  77. Status AddPlaceHolderEnd(const AnchorPtr &out_anchor, const AnchorPtr &in_anchor);
  78. void AddNewGraphToPartition(ComputeGraphPtr &input_graph, const std::string &engine_name);
  79. Status AddPartitionsToGraphNode(vector<SubGraphInfoPtr> &output_subgraphs, ComputeGraphPtr compute_graph);
  80. // check if the node has no input
  81. bool HasNoInput(NodePtr node);
  82. // check if the node is data-like. Currently data-like means: data, variable, const
  83. bool IsDataLike(NodePtr node);
  84. // add place holder and end node in src and dst graph
  85. graphStatus AddPlaceHolderEndInSrcDstGraph(const AnchorPtr &out_data_anchor, const AnchorPtr &peer_in_anchor,
  86. const ComputeGraphPtr &pld_graph, const ComputeGraphPtr &end_graph);
  87. Status LinkInput2EndRemoveOrginalLink(NodePtr input_node, ComputeGraphPtr src_graph, ComputeGraphPtr dst_graph);
  88. /// After partition, put input nodes in srcGraph to dstGraph. Data will be linked to 'end';
  89. /// the other end will be linked to 'placeholder'
  90. Status PutInputNodesInSubGraph(const ComputeGraphPtr &src_graph, const ComputeGraphPtr &dst_graph);
  91. // Sort all subGraphs topologically, store the info in sorted_partitions_ <computeGraph, rank>
  92. Status SortSubGraphs(const ComputeGraphPtr &);
  93. AnchorPtr GetEndInAnchor(const AnchorPtr &src_anchor, const NodePtr &end_node);
  94. AnchorPtr GetPldOutAnchor(const NodePtr &pld_node, const AnchorPtr &dst_anchor);
  95. Status RemoveNodeAndEdgeBetweenEndPld(ComputeGraphPtr &output_merged_compute_graph,
  96. const std::vector<SubGraphInfoPtr> &sub_graph_list);
  97. void AddEndPldInformationToSubGraphInfo(SubGraphInfoPtr &sub_graph_info);
  98. bool IsMergeable(size_t parent_cluster, size_t child_cluster, size_t upper_bound);
  99. // Link from->to
  100. void InsertEdge(size_t from, size_t to);
  101. // Remove parent cluster's out and child cluster's in
  102. void RemoveEdge(size_t parent_cluster, size_t child_cluster);
  103. void MergeTwoClusters(size_t parent_cluster, size_t &child_cluster);
  104. // Check if there's a second path between two clusters. The max path length is upper_bound
  105. bool HasSecondPath(size_t src, size_t dst, size_t upper_bound);
  106. // Mark all clusters
  107. void MarkClusters();
  108. /// Split all sub graph and add placeholder, end according to marks
  109. /// traverse marked clusters and split them into sub-graphs
  110. Status SplitSubGraphs(ComputeGraphPtr compute_graph);
  111. Status UpdateEndOpDesc(const NodePtr &src_node, int output_index, OpDescPtr &end_op_desc);
  112. Status UpdatePldOpDesc(const NodePtr &dst_node, int input_index, OpDescPtr &end_op_desc);
  113. // Clear partition data
  114. void ClearAllPartitionData();
  115. void SetMergedGraphId(ComputeGraphPtr &output_merged_compute_graph);
  116. struct GraphPartitionInfo {
  117. EnginePlacer engine_placer_;
  118. PartitionMap partitions_; // sub-graphs after partition <sub-graph-id, ComputeGraphPtr>
  119. std::unordered_map<ComputeGraphPtr, size_t> partitions_2_rank_; // <subGraph, rank>
  120. std::vector<ComputeGraphPtr> rank_2_partitions_; // <rank, subGraph>
  121. NodetoNodeMap corresponding_node_in_partitions_; // mapping between a node in the original graph and
  122. uint32_t num_of_pld_end_; // a counter to track 'place holder' and 'end'
  123. size_t input_size_;
  124. size_t output_size_;
  125. std::string output_name_;
  126. NodetoNodeMap end_2_pld_; // mapping between each 'end; and 'placeHolder' node
  127. NodetoNodeMap pld_2_end_; // mapping between each 'placeHolder' and 'end' node
  128. std::map<size_t, NodePtr> index_2_end_; // order mapping between peerindex and 'end' node
  129. Mode mode_;
  130. std::unordered_map<size_t, ClusterPtr> clusters_; // index to cluster ptr, contains all nodes
  131. std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_; // node map to cluster
  132. std::unordered_map<std::shared_ptr<Cluster>, ComputeGraphPtr> cluster_2_partition_; // cluster map to subgraph
  133. void ClearAllData(Mode mode) {
  134. rank_2_partitions_.clear();
  135. partitions_2_rank_.clear();
  136. partitions_.clear();
  137. corresponding_node_in_partitions_.clear();
  138. index_2_end_.clear();
  139. cluster_2_partition_.clear();
  140. clusters_.clear();
  141. node_2_cluster_.clear();
  142. pld_2_end_.clear();
  143. end_2_pld_.clear();
  144. if (mode_ == kMerging) {
  145. mode_ = kPartitioning;
  146. } else {
  147. mode_ = mode;
  148. }
  149. }
  150. GraphPartitionInfo() : num_of_pld_end_(0), input_size_(0), output_size_(0), mode_(kPartitioning) {}
  151. ~GraphPartitionInfo() = default;
  152. };
  153. std::unordered_map<ComputeGraphPtr, GraphPartitionInfo> graph_2_graph_partition_info_;
  154. Graph2SubGraphInfoList graph_2_subgraph_list_;
  155. Graph2InputNodesSubGraphInfo graph_2_input_subgraph_;
  156. GraphPartitionInfo graph_info_;
  157. uint32_t partition_times_; // times of call partition
  158. std::map<Mode, std::string> mode_2_str_ = {{kPartitioning, "Partitioning"},
  159. {kSecondPartitioning, "SecondPartitioning"}, {kMerging, "Merging"}};
  160. friend class GraphManager;
  161. };
  162. } // namespace ge
  163. #endif // GE_GRAPH_PARTITION_GRAPH_PARTITION_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示