You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

logical_stream_allocator.h 7.9 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_BUILD_LOGICAL_STREAM_ALLOCATOR_H_
  17. #define GE_GRAPH_BUILD_LOGICAL_STREAM_ALLOCATOR_H_
  18. #include <map>
  19. #include <memory>
  20. #include <set>
  21. #include <string>
  22. #include <utility>
  23. #include <vector>
  24. #include "engine_manager/dnnengine_manager.h"
  25. #include "graph/manager/graph_manager_utils.h"
  26. namespace ge {
  27. // Define default fuctions for stream passes.
  28. #define STREAM_PASS_DEFAULT_FUNC(CLASS) \
  29. CLASS() : LogicalStreamPass(#CLASS) {} \
  30. ~CLASS() override = default; \
  31. CLASS(const CLASS &) = delete; \
  32. CLASS &operator=(const CLASS &) = delete
  33. static const int64_t kInvalidStream = -1;
  34. // Base stream class.
  35. class LogicalStreamPass {
  36. public:
  37. static const int64_t kDefaultMaxParalleNum = 1;
  38. struct Subgraph;
  39. using SubgraphPtr = std::shared_ptr<Subgraph>;
  40. struct Subgraph {
  41. string name;
  42. int64_t stream_id = kInvalidStream;
  43. const SubGraphInfo &subgraph_info;
  44. const EngineConf &engine_conf;
  45. int64_t max_parallel_num = kDefaultMaxParalleNum;
  46. SubgraphPtr reused_subgraph = nullptr;
  47. Subgraph(const SubGraphInfo &subgraph_info, const EngineConf &engine_conf)
  48. : subgraph_info(subgraph_info), engine_conf(engine_conf) {}
  49. };
  50. struct Context {
  51. int64_t default_stream = kInvalidStream;
  52. int64_t next_stream = 0;
  53. bool enable_single_stream = false;
  54. bool enable_hcom_parallel = false;
  55. };
  56. explicit LogicalStreamPass(const std::string &name);
  57. LogicalStreamPass(const LogicalStreamPass &) = delete;
  58. LogicalStreamPass &operator=(const LogicalStreamPass &) = delete;
  59. virtual ~LogicalStreamPass() = default;
  60. const std::string &GetName() const;
  61. virtual Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) = 0;
  62. protected:
  63. bool IsEngineSkip(const Subgraph &subgraph) const;
  64. bool IsEngineAttach(const Subgraph &subgraph) const;
  65. bool IsEngineIndependent(const Subgraph &subgraph) const;
  66. bool HasStreamLabel(const Subgraph &subgraph) const;
  67. bool HasAssignedStream(const Subgraph &subgraph) const;
  68. private:
  69. std::string name_;
  70. };
  71. using LogicalStreamPassPtr = std::shared_ptr<LogicalStreamPass>;
  72. // Allocate streams by label.
  73. class AssignByLabelPass : public LogicalStreamPass {
  74. public:
  75. STREAM_PASS_DEFAULT_FUNC(AssignByLabelPass);
  76. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  77. };
  78. // Engines such as hccl require independent Stream.
  79. class IndependentStreamPass : public LogicalStreamPass {
  80. public:
  81. STREAM_PASS_DEFAULT_FUNC(IndependentStreamPass);
  82. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  83. };
  84. // Reuse streams or assign new streams based on dependencies.
  85. class AssignByDependencyPass : public LogicalStreamPass {
  86. public:
  87. STREAM_PASS_DEFAULT_FUNC(AssignByDependencyPass);
  88. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  89. private:
  90. void InitEndSubgraphMap(const std::vector<SubgraphPtr> &subgraphs, std::map<NodePtr, SubgraphPtr> &end_subgraph_map);
  91. void InitPldSubgraphMap(const std::vector<SubgraphPtr> &subgraphs, std::map<NodePtr, SubgraphPtr> &pld_subgraph_map);
  92. SubgraphPtr GetReusableSubgraph(const SubgraphPtr &subgraph, const std::map<NodePtr, SubgraphPtr> &end_subgraph_map,
  93. const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map);
  94. int64_t AssignNewStream(SubgraphPtr subgraph);
  95. void UpdateAssignedSubgraphs(Context &context);
  96. void UpdateReusedSubgraphs();
  97. bool CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph,
  98. const std::map<NodePtr, SubgraphPtr> &pld_subgraph_map);
  99. // <engine name, next stream id>
  100. std::map<std::string, int64_t> engine_next_streams_;
  101. // <engine name, stream num>
  102. std::map<std::string, int64_t> engine_stream_num_;
  103. // Subgraphs of assign stream by engine
  104. std::vector<SubgraphPtr> assigned_subgraphs_;
  105. // <current subgraph, reused subgraph>
  106. std::vector<std::pair<SubgraphPtr, SubgraphPtr>> reused_subgraphs_;
  107. };
  108. // All nodes in the graph are assigned the same stream.
  109. class SingleStreamPass : public LogicalStreamPass {
  110. public:
  111. STREAM_PASS_DEFAULT_FUNC(SingleStreamPass);
  112. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  113. };
  114. // Update the stream of subgraphs to nodes.
  115. class NodeStreamUpdatePass : public LogicalStreamPass {
  116. public:
  117. STREAM_PASS_DEFAULT_FUNC(NodeStreamUpdatePass);
  118. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  119. };
  120. // Update the stream of subgraphs to nodes.
  121. class UpdateForSkippedEnginePass : public LogicalStreamPass {
  122. public:
  123. STREAM_PASS_DEFAULT_FUNC(UpdateForSkippedEnginePass);
  124. /// Optimize for case like:
  125. /// NodeA(stream1) -> Const(stream2) -> NodeB(stream1)
  126. /// To case:
  127. /// NodeA(stream1) -> Const(stream1) -> NodeB(stream1)
  128. /// Which could reduce event number (Const could be other type which belong to skipped engine subgraph)
  129. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  130. private:
  131. int64_t GetSingleInoutStream(const NodePtr &node) const;
  132. // Judge if all predecessors' streams of node are kInvalidStream
  133. bool AreAllPredStreamsInvalid(const NodePtr &node) const;
  134. };
  135. // AllReduce and backward operators execute in parallel.
  136. class AllReduceParallelPass : public LogicalStreamPass {
  137. public:
  138. STREAM_PASS_DEFAULT_FUNC(AllReduceParallelPass);
  139. Status Run(ComputeGraphPtr graph, const std::vector<SubgraphPtr> &subgraphs, Context &context) override;
  140. private:
  141. bool IsHcomNode(const std::string& node_type);
  142. };
  143. // Assign logical streams which is not limited by the number of tasks.
  144. class LogicalStreamAllocator {
  145. using Subgraph = LogicalStreamPass::Subgraph;
  146. using SubgraphPtr = LogicalStreamPass::SubgraphPtr;
  147. using Context = LogicalStreamPass::Context;
  148. public:
  149. LogicalStreamAllocator(const std::map<std::string, SchedulerConf> &scheduler_confs,
  150. const std::map<std::string, int> &max_parallel_num);
  151. LogicalStreamAllocator(const LogicalStreamAllocator &) = delete;
  152. LogicalStreamAllocator &operator=(const LogicalStreamAllocator &) = delete;
  153. ~LogicalStreamAllocator() = default;
  154. void EnableSingleStream(bool enable);
  155. void EnableHcomParallel(bool hcom_parallel);
  156. Status Assign(const ComputeGraphPtr &root_graph, const Graph2SubGraphInfoList &subgraph_map, int64_t &stream_num);
  157. private:
  158. Status DoAssign(const ComputeGraphPtr &graph, const Graph2SubGraphInfoList &subgraph_map,
  159. const map<string, EngineConfPtr> &engine_confs);
  160. Status ConvertSubgraphs(const std::vector<SubGraphInfoPtr> &subgraph_infos,
  161. const std::map<std::string, EngineConfPtr> &engine_confs,
  162. std::vector<SubgraphPtr> &subgraphs);
  163. Status RunPasses(const ComputeGraphPtr &graph, const std::vector<SubgraphPtr> &subgraphs);
  164. void RefreshContinuousStreams(const ComputeGraphPtr &graph);
  165. const std::map<std::string, SchedulerConf> &scheduler_confs_;
  166. const std::map<std::string, int> &max_parallel_num_;
  167. Context context_;
  168. };
  169. } // namespace ge
  170. #endif // GE_GRAPH_BUILD_LOGICAL_STREAM_ALLOCATOR_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示