You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

stream_allocator.h 5.4 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef GE_GRAPH_BUILD_STREAM_ALLOCATOR_H_
  17. #define GE_GRAPH_BUILD_STREAM_ALLOCATOR_H_
  18. #include <map>
  19. #include <set>
  20. #include <string>
  21. #include <utility>
  22. #include <vector>
  23. #include "engine_manager/dnnengine_manager.h"
  24. #include "framework/common/ge_inner_error_codes.h"
  25. #include "graph/compute_graph.h"
  26. #include "graph/manager/graph_manager_utils.h"
  27. namespace ge {
  28. class StreamAllocator {
  29. public:
  30. StreamAllocator(ComputeGraphPtr whole_graph, const Graph2SubGraphInfoList &subgraphs);
  31. StreamAllocator(const StreamAllocator &) = delete;
  32. StreamAllocator &operator=(const StreamAllocator &) = delete;
  33. ~StreamAllocator() = default;
  34. Status AssignLogicalStreams(const std::map<std::string, int> &max_parallel_num, bool hcom_parallel);
  35. Status RefreshRealStream(int64_t &stream_num, int64_t &event_num);
  36. const vector<int64_t> &GetHugeStreams() const { return huge_streams_; }
  37. private:
  38. Status AssignSingleStream();
  39. Status SetActiveStreamsByLabel();
  40. Status SetActiveStreamsForSubgraphs();
  41. Status InsertSyncEvents();
  42. Status InsertOneEventInTwoNodes(const NodePtr &cur_node_ptr, const NodePtr &next_node_ptr);
  43. Status InsertEventsForSubgraph();
  44. Status OptimizeSyncEvents();
  45. Status OptimizeBySendEvents(const std::map<int64_t, std::vector<NodePtr>> &stream_nodes);
  46. Status OptimizeByRecvEvents(const std::map<int64_t, std::vector<NodePtr>> &stream_nodes);
  47. Status OptimizeByStreamActivate();
  48. // Determine if the successor node of RecvNode is directly or indirectly activated by the SendNode precursor node
  49. bool IsRecvNodeActivatedBySendNode(const NodePtr &send_node_ptr, const NodePtr &recv_node_ptr) const;
  50. bool IsActiveAfterNextIteration(const NodePtr &active_node_ptr) const;
  51. Status SplitStreams(std::vector<std::set<int64_t>> &split_streams);
  52. bool NeedSpiltNewStream(int64_t stream_node_num, int64_t max_node_num_one_stream, const OpDescPtr &op_desc) const;
  53. Status UpdateActiveStreams(const std::vector<std::set<int64_t>> &split_streams);
  54. void UpdateLabelStreams(const std::vector<std::set<int64_t>> &split_streams);
  55. Status UpdateActiveStreamsForSwitchNode(NodePtr &switch_node);
  56. Status InsertActiveNodesAfterSwitch(NodePtr &switch_nodes, std::vector<NodePtr> &switch_active_nodes);
  57. Status UpdateActiveStreamsForActiveNode(const std::vector<std::set<int64_t>> &split_streams, NodePtr &node);
  58. Status UpdateActiveStreamsForSubgraphs() const;
  59. bool IsActivated(int64_t stream_id) const;
  60. Status SetActiveStreamsForLoop();
  61. Status CheckStreamActived() const;
  62. Status RefreshContinuousEvents();
  63. Status InsertSyncEventNodes();
  64. Status ReorderEventNodes() const;
  65. void DumpEvents();
  66. Status GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stream_count, uint32_t &max_task_count);
  67. int64_t GetMaxNodeNumPerStream(const NodePtr &node, uint32_t max_node_num_one_stream);
  68. void AddNodeNum(const NodePtr &node, int64_t &node_num);
  69. void AddSendEventId(const NodePtr &node, uint32_t event_id);
  70. void AddRecvEventId(const NodePtr &node, uint32_t event_id);
  71. void RmvSendEventId(const NodePtr &node, uint32_t event_id);
  72. void RmvRecvEventId(const NodePtr &node, uint32_t event_id);
  73. void GetSendEventIdList(const NodePtr &node, std::vector<uint32_t> &send_list) const;
  74. void GetRecvEventIdList(const NodePtr &node, std::vector<uint32_t> &recv_list) const;
  75. NodePtr GetNodeFromSendEventId(uint32_t send_event_id) const;
  76. NodePtr GetNodeFromRecvEventId(uint32_t recv_event_id) const;
  77. Status AddEventId(const NodePtr &pre_node, const NodePtr &not_cur, const NodePtr &cur_node, bool not_use_cur);
  78. Status AddActiveNodes(NodePtr &switch_node, const std::vector<std::string> &ori_active_label_list,
  79. std::vector<std::string> &active_label_list, std::vector<NodePtr> &added_active_nodes);
  80. Status SetActiveStreamList(NodePtr &active_node, const std::string &active_label);
  81. ComputeGraphPtr whole_graph_;
  82. const Graph2SubGraphInfoList &subgraphs_;
  83. int64_t stream_num_{0};
  84. uint32_t event_num_{0};
  85. bool enable_single_stream_{false};
  86. vector<int64_t> huge_streams_;
  87. // <stream label, set<stream id>>
  88. std::map<string, std::set<int64_t>> labeled_streams_;
  89. std::map<std::string, std::set<NodePtr>> specific_activated_labels_;
  90. std::set<int64_t> specific_activated_streams_;
  91. std::map<int64_t, std::set<NodePtr>> specific_activated_streams_nodes_map_;
  92. std::map<NodePtr, int64_t> node_split_stream_map_;
  93. std::map<ComputeGraphPtr, NodePtr> subgraph_first_active_node_map_;
  94. // send events corresponding to the node
  95. std::map<NodePtr, std::vector<uint32_t>> node_to_send_events_;
  96. // recv events corresponding to the node
  97. std::map<NodePtr, std::vector<uint32_t>> node_to_recv_events_;
  98. };
  99. } // namespace ge
  100. #endif // GE_GRAPH_BUILD_STREAM_ALLOCATOR_H_

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示