You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

logical_stream_allocator.cc 28 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/logical_stream_allocator.h"
  17. #include <queue>
  18. #include "common/ge/ge_util.h"
  19. #include "framework/common/debug/ge_log.h"
  20. #include "framework/common/fmk_error_codes.h"
  21. #include "framework/common/types.h"
  22. #include "graph/debug/ge_attr_define.h"
  23. #include "graph/utils/graph_utils.h"
  24. using std::map;
  25. using std::queue;
  26. using std::set;
  27. using std::string;
  28. using std::vector;
  29. namespace {
  30. const char *const kAICPUEngineName = "DNN_VM_AICPU";
  31. const char *const kAttrNameParentOpType = "parentOpType";
  32. const size_t kHeadNodeMaxNum = 820; // calculated by 1024 * 0.8
  33. } // namespace
  34. namespace ge {
  35. LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {}
  36. const string &LogicalStreamPass::GetName() const { return name_; }
  37. bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; }
  38. bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; }
  39. bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; }
  40. bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const {
  41. return !subgraph.subgraph_info.GetStreamLabel().empty();
  42. }
  43. bool LogicalStreamPass::HasAssignedStream(const Subgraph &subgraph) const {
  44. return subgraph.stream_id != kInvalidStream;
  45. }
  46. bool LogicalStreamPass::HasNonConstInputNode(const Subgraph &subgraph) const {
  47. const SubGraphInfo &subgraph_info = subgraph.subgraph_info;
  48. const auto &pld_to_end_map = subgraph_info.GetPld2EndMap();
  49. for (const auto &pld_to_end : pld_to_end_map) {
  50. const NodePtr &placeholder = pld_to_end.first;
  51. if (placeholder != nullptr) {
  52. string parent_op_type;
  53. if (AttrUtils::GetStr(placeholder->GetOpDesc(), kAttrNameParentOpType, parent_op_type)) {
  54. if ((parent_op_type != CONSTANT) && (parent_op_type != CONSTANTOP)) {
  55. return true;
  56. }
  57. }
  58. }
  59. }
  60. return false;
  61. }
  62. Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  63. bool changed = false;
  64. int64_t &next_stream = context.next_stream;
  65. map<string, int64_t> label_streams;
  66. for (const SubgraphPtr &subgraph : subgraphs) {
  67. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  68. if (!stream_label.empty()) {
  69. // Subgraphs of the same stream_label are assigned to the same stream,
  70. // and different stream_labels are assigned new streams.
  71. auto iter = label_streams.find(stream_label);
  72. if (iter != label_streams.end()) {
  73. subgraph->stream_id = iter->second;
  74. } else {
  75. subgraph->stream_id = next_stream;
  76. GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str());
  77. label_streams.emplace(stream_label, next_stream);
  78. ++next_stream;
  79. }
  80. changed = true;
  81. }
  82. }
  83. return changed ? SUCCESS : NOT_CHANGED;
  84. }
  85. Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  86. bool changed = false;
  87. int64_t &next_stream = context.next_stream;
  88. // <engine, <label, stream>>
  89. map<string, map<string, int64_t>> engine_streams;
  90. for (const SubgraphPtr &subgraph : subgraphs) {
  91. if (!IsEngineIndependent(*subgraph)) {
  92. continue;
  93. }
  94. const string &engine = subgraph->engine_conf.id;
  95. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  96. auto &label_streams = engine_streams[engine];
  97. auto iter = label_streams.find(stream_label);
  98. if (iter != label_streams.end()) {
  99. subgraph->stream_id = iter->second;
  100. } else {
  101. subgraph->stream_id = next_stream;
  102. GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(),
  103. stream_label.c_str());
  104. label_streams.emplace(stream_label, next_stream);
  105. ++next_stream;
  106. }
  107. changed = true;
  108. }
  109. return changed ? SUCCESS : NOT_CHANGED;
  110. }
  111. Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  112. bool changed = false;
  113. if (IsHeadNodeExceeded(subgraphs)) {
  114. int64_t &next_stream = context.next_stream;
  115. for (const SubgraphPtr &subgraph : subgraphs) {
  116. if (!HasAssignedStream(*subgraph)) {
  117. subgraph->stream_id = next_stream;
  118. changed = true;
  119. }
  120. }
  121. if (changed) {
  122. ++next_stream;
  123. return SUCCESS;
  124. }
  125. return NOT_CHANGED;
  126. }
  127. map<NodePtr, SubgraphPtr> end_subgraph_map;
  128. map<NodePtr, SubgraphPtr> pld_subgraph_map;
  129. InitEndSubgraphMap(subgraphs, end_subgraph_map);
  130. InitPldSubgraphMap(subgraphs, pld_subgraph_map);
  131. for (const SubgraphPtr &subgraph : subgraphs) {
  132. if (HasAssignedStream(*subgraph)) {
  133. continue;
  134. }
  135. SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map);
  136. if (reusable_subgraph != nullptr) {
  137. if (HasAssignedStream(*reusable_subgraph)) {
  138. subgraph->stream_id = reusable_subgraph->stream_id;
  139. } else {
  140. int64_t stream_id = AssignNewStream(reusable_subgraph);
  141. subgraph->stream_id = stream_id;
  142. GELOGI("Reusable subgraph %s has not been assigned a stream, now assign new stream %ld.",
  143. reusable_subgraph->name.c_str(), stream_id);
  144. }
  145. if (reusable_subgraph->reused_subgraph != nullptr) {
  146. reusable_subgraph = reusable_subgraph->reused_subgraph;
  147. }
  148. subgraph->reused_subgraph = reusable_subgraph;
  149. reused_subgraphs_.emplace_back(subgraph, reusable_subgraph);
  150. GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(),
  151. subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(),
  152. reusable_subgraph->engine_conf.id.c_str());
  153. } else {
  154. (void)AssignNewStream(subgraph);
  155. }
  156. changed = true;
  157. }
  158. UpdateAssignedSubgraphs(context);
  159. UpdateReusedSubgraphs();
  160. return changed ? SUCCESS : NOT_CHANGED;
  161. }
  162. bool AssignByDependencyPass::IsHeadNodeExceeded(const vector<SubgraphPtr> &subgraphs) const {
  163. size_t aicpu_node_num = 0;
  164. for (const SubgraphPtr &subgraph : subgraphs) {
  165. if (subgraph->engine_conf.id == kAICPUEngineName && !HasNonConstInputNode(*subgraph)) {
  166. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  167. auto compute_graph = subgraph_info.GetSubGraph();
  168. aicpu_node_num += compute_graph->GetDirectNode().size() - subgraph_info.GetPld2EndMap().size() -
  169. subgraph_info.GetEnd2PldMap().size();
  170. if (aicpu_node_num > kHeadNodeMaxNum) {
  171. GELOGI("aicpu_node_num, %zu", aicpu_node_num);
  172. return true;
  173. }
  174. }
  175. }
  176. return false;
  177. }
  178. void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs,
  179. map<NodePtr, SubgraphPtr> &end_subgraph_map) {
  180. for (const auto &subgraph : subgraphs) {
  181. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  182. for (const auto &item : subgraph_info.GetEnd2PldMap()) {
  183. end_subgraph_map.emplace(item.first, subgraph);
  184. }
  185. }
  186. }
  187. void AssignByDependencyPass::InitPldSubgraphMap(const vector<SubgraphPtr> &subgraphs,
  188. map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  189. for (const auto &subgraph : subgraphs) {
  190. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  191. for (const auto &item : subgraph_info.GetPld2EndMap()) {
  192. pld_subgraph_map.emplace(item.first, subgraph);
  193. }
  194. }
  195. }
  196. bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph,
  197. const map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  198. if ((subgraph == nullptr) || (pred_subgraph == nullptr)) {
  199. return false;
  200. }
  201. if (subgraph->engine_conf.scheduler_id != pred_subgraph->engine_conf.scheduler_id) {
  202. return false;
  203. }
  204. if (IsEngineIndependent(*pred_subgraph) || HasStreamLabel(*pred_subgraph)) {
  205. return false;
  206. }
  207. // If the engine of the predecessor subgraph is the same as the other successor subgraphs, the stream is not reused.
  208. for (const auto &end_pld_pair : pred_subgraph->subgraph_info.GetEnd2PldMap()) {
  209. auto iter = pld_subgraph_map.find(end_pld_pair.second);
  210. if (iter != pld_subgraph_map.end()) {
  211. const SubgraphPtr &pred_subgraph_succ = iter->second;
  212. if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) {
  213. return false;
  214. }
  215. }
  216. }
  217. if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) {
  218. return true;
  219. }
  220. if ((pred_subgraph->reused_subgraph != nullptr) &&
  221. (pred_subgraph->reused_subgraph->engine_conf.id == subgraph->engine_conf.id)) {
  222. return true;
  223. }
  224. return false;
  225. }
  226. LogicalStreamPass::SubgraphPtr AssignByDependencyPass::GetReusableSubgraph(
  227. const SubgraphPtr &subgraph, const map<NodePtr, SubgraphPtr> &end_subgraph_map,
  228. const map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  229. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  230. for (const auto &pld_2_end : subgraph_info.GetPld2EndMap()) {
  231. const NodePtr &peer_end = pld_2_end.second;
  232. auto iter = end_subgraph_map.find(peer_end);
  233. if (iter != end_subgraph_map.end()) {
  234. const SubgraphPtr &pred_subgraph = iter->second;
  235. if (CouldReuse(subgraph, pred_subgraph, pld_subgraph_map)) {
  236. return pred_subgraph;
  237. }
  238. }
  239. }
  240. return nullptr;
  241. }
  242. int64_t AssignByDependencyPass::AssignNewStream(SubgraphPtr subgraph) {
  243. const string &engine_name = subgraph->engine_conf.id;
  244. int64_t max_parallel_num = subgraph->max_parallel_num;
  245. int64_t stream_id = 0;
  246. auto next_iter = engine_next_streams_.find(engine_name);
  247. if (next_iter != engine_next_streams_.end()) {
  248. stream_id = next_iter->second;
  249. }
  250. if (stream_id >= max_parallel_num) {
  251. stream_id = 0;
  252. }
  253. subgraph->stream_id = stream_id;
  254. engine_next_streams_[engine_name] = stream_id + 1;
  255. assigned_subgraphs_.emplace_back(subgraph);
  256. if ((stream_id + 1) > engine_stream_num_[engine_name]) {
  257. engine_stream_num_[engine_name] = stream_id + 1;
  258. }
  259. GELOGI("Subgraph %s assigns new temp stream %ld (engine: %s).", subgraph->name.c_str(), stream_id,
  260. engine_name.c_str());
  261. return stream_id;
  262. }
  263. void AssignByDependencyPass::UpdateAssignedSubgraphs(Context &context) {
  264. // If the default stream is valid, the first assigned stream will reuse the default stream id
  265. // and other streams use new id. To ensure that the id of the new stream is continuous,
  266. // we first subtract one from next_stream.
  267. int64_t to_be_updated_stream = kInvalidStream;
  268. if (context.default_stream != kInvalidStream) {
  269. context.next_stream--;
  270. to_be_updated_stream = context.next_stream;
  271. }
  272. // Update the starting stream id for each engine.
  273. int64_t &next_stream = context.next_stream;
  274. map<string, int64_t> engine_start_streams;
  275. for (const auto &item : engine_stream_num_) {
  276. int64_t stream_count = item.second;
  277. engine_start_streams[item.first] = next_stream;
  278. next_stream += stream_count;
  279. }
  280. // Update the subgraph streams assigned by engine.
  281. for (auto &subgraph : assigned_subgraphs_) {
  282. subgraph->stream_id += engine_start_streams[subgraph->engine_conf.id];
  283. if (subgraph->stream_id == to_be_updated_stream) {
  284. subgraph->stream_id = context.default_stream;
  285. GELOGI("Subgraph %s of engine %s reuses default stream %ld.", subgraph->name.c_str(),
  286. subgraph->engine_conf.id.c_str(), context.default_stream);
  287. } else {
  288. GELOGI("Stream of subgraph %s has been updated to %ld.", subgraph->name.c_str(), subgraph->stream_id);
  289. }
  290. }
  291. }
  292. void AssignByDependencyPass::UpdateReusedSubgraphs() {
  293. // Update streams for the subgraphs of reusing stream.
  294. for (const auto &item : reused_subgraphs_) {
  295. auto &cur_subgraph = item.first;
  296. auto &reused_graph = item.second;
  297. cur_subgraph->stream_id = reused_graph->stream_id;
  298. GELOGI("Stream of subgraph %s has been updated to %ld.", cur_subgraph->name.c_str(), cur_subgraph->stream_id);
  299. }
  300. }
  301. Status SingleStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  302. // context.default_stream can be kInvalidStream only when graph is the root graph.
  303. int64_t new_stream = context.default_stream;
  304. if (new_stream == kInvalidStream) {
  305. new_stream = context.next_stream;
  306. ++context.next_stream;
  307. }
  308. for (const SubgraphPtr &subgraph : subgraphs) {
  309. if (!HasAssignedStream(*subgraph)) {
  310. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  311. if (!stream_label.empty()) {
  312. GELOGE(INTERNAL_ERROR, "Stream labels are not supported (subgraph: %s, stream label: %s).",
  313. subgraph->name.c_str(), stream_label.c_str());
  314. return INTERNAL_ERROR;
  315. }
  316. subgraph->stream_id = new_stream;
  317. }
  318. }
  319. return SUCCESS;
  320. }
  321. Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  322. // Check if all subgraphs have been assigned a stream.
  323. for (const SubgraphPtr &subgraph : subgraphs) {
  324. const string &engine_name = subgraph->engine_conf.id;
  325. if (!IsEngineSkip(*subgraph) && !HasAssignedStream(*subgraph)) {
  326. GELOGE(INTERNAL_ERROR, "Subgraph %s has not yet been assigned a stream (engine: %s).", subgraph->name.c_str(),
  327. engine_name.c_str());
  328. return INTERNAL_ERROR;
  329. } else {
  330. GELOGI("Subgraph %s is assigned stream %ld (engine: %s).", subgraph->name.c_str(), subgraph->stream_id,
  331. engine_name.c_str());
  332. }
  333. }
  334. // Init the stream id of node.
  335. for (NodePtr &node : graph->GetDirectNode()) {
  336. GE_CHECK_NOTNULL(node->GetOpDesc());
  337. node->GetOpDesc()->SetStreamId(kInvalidStream);
  338. }
  339. // Set the stream id of the subgraph to the node.
  340. for (const SubgraphPtr &subgraph : subgraphs) {
  341. int64_t stream_id = subgraph->stream_id;
  342. const string &engine_name = subgraph->engine_conf.id;
  343. auto compute_graph = subgraph->subgraph_info.GetSubGraph();
  344. for (NodePtr &node : compute_graph->GetDirectNode()) {
  345. GE_CHECK_NOTNULL(node->GetOpDesc());
  346. if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) {
  347. GELOGD("Node %s of type %s in subgraph %s doesn't need to assign a stream (engine: %s).",
  348. node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str());
  349. } else {
  350. node->GetOpDesc()->SetStreamId(stream_id);
  351. GELOGD("Node %s of type %s in subgraph %s is assigned stream %ld (engine: %s).", node->GetName().c_str(),
  352. node->GetType().c_str(), subgraph->name.c_str(), stream_id, engine_name.c_str());
  353. }
  354. }
  355. }
  356. // Update stream id for nodes belong to skipped engine subgraph
  357. GE_CHK_STATUS_RET(UpdateForSkippedEngine(graph, subgraphs));
  358. return SUCCESS;
  359. }
  360. int64_t NodeStreamUpdatePass::GetSingleInoutStream(const NodePtr &node) const {
  361. set<int64_t> stream_ids;
  362. for (const auto &in_node : node->GetInAllNodes()) {
  363. GE_CHECK_NOTNULL_EXEC(in_node->GetOpDesc(), return kInvalidStream);
  364. int64_t stream_id = in_node->GetOpDesc()->GetStreamId();
  365. if (stream_id != kInvalidStream) {
  366. stream_ids.insert(stream_id);
  367. }
  368. }
  369. for (const auto &out_node : node->GetOutAllNodes()) {
  370. GE_CHECK_NOTNULL_EXEC(out_node->GetOpDesc(), return kInvalidStream);
  371. int64_t stream_id = out_node->GetOpDesc()->GetStreamId();
  372. if (stream_id != kInvalidStream) {
  373. stream_ids.insert(stream_id);
  374. }
  375. }
  376. if (stream_ids.size() == 1) {
  377. int64_t stream_id = *(stream_ids.begin());
  378. GELOGI("The stream of all input and output nodes of node %s (type: %s) is %ld.", node->GetName().c_str(),
  379. node->GetType().c_str(), stream_id);
  380. return stream_id;
  381. }
  382. return kInvalidStream;
  383. }
  384. Status NodeStreamUpdatePass::UpdateForSkippedEngine(const ComputeGraphPtr &graph,
  385. const vector<SubgraphPtr> &subgraphs) {
  386. set<OpDescPtr> ops_without_label;
  387. // Check if subgraph is engine skipped and without stream label or not
  388. for (const SubgraphPtr &subgraph : subgraphs) {
  389. if (IsEngineSkip(*subgraph)) {
  390. auto compute_graph = subgraph->subgraph_info.GetSubGraph();
  391. for (NodePtr &node : compute_graph->GetDirectNode()) {
  392. auto op_desc = node->GetOpDesc();
  393. GE_CHECK_NOTNULL(op_desc);
  394. auto stream_id = op_desc->GetStreamId();
  395. if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) {
  396. ops_without_label.emplace(op_desc);
  397. }
  398. }
  399. }
  400. }
  401. // Try reassign the stream id
  402. for (ge::NodePtr &node : graph->GetDirectNode()) {
  403. auto op_desc = node->GetOpDesc();
  404. GE_CHECK_NOTNULL(op_desc);
  405. int64_t stream_id = op_desc->GetStreamId();
  406. if (ops_without_label.find(op_desc) != ops_without_label.end()) {
  407. if (AreAllPredStreamsInvalid(node) && op_desc->GetSubgraphInstanceNames().empty()) {
  408. op_desc->SetStreamId(kInvalidStream);
  409. GELOGI("Node %s of type %s reassign to stream %ld from stream %ld.", node->GetName().c_str(),
  410. node->GetType().c_str(), kInvalidStream, stream_id);
  411. } else if (!node->GetOutAllNodes().empty()) {
  412. int64_t inout_stream = GetSingleInoutStream(node);
  413. if (inout_stream != kInvalidStream) {
  414. op_desc->SetStreamId(inout_stream);
  415. GELOGI("Node %s of type %s reassign to stream %ld from stream %ld.", node->GetName().c_str(),
  416. node->GetType().c_str(), inout_stream, stream_id);
  417. }
  418. }
  419. }
  420. }
  421. return SUCCESS;
  422. }
  423. bool NodeStreamUpdatePass::AreAllPredStreamsInvalid(const NodePtr &node) const {
  424. for (const auto &pre_node : node->GetInAllNodes()) {
  425. auto pre_node_desc = pre_node->GetOpDesc();
  426. if (pre_node_desc != nullptr) {
  427. int64_t stream_id = pre_node_desc->GetStreamId();
  428. if (stream_id != kInvalidStream) {
  429. return false;
  430. }
  431. }
  432. }
  433. return true;
  434. }
  435. Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  436. if (!context.enable_hcom_parallel) {
  437. return NOT_CHANGED;
  438. }
  439. GELOGI("AllReduceParallelPass is enabled.");
  440. GE_DUMP(graph, "BeforeAllReduceParallel");
  441. // All successors of HcomAllReduce.
  442. set<NodePtr> all_reduce_succs;
  443. for (const NodePtr &node : graph->GetDirectNode()) {
  444. if ((node->GetType() != HCOMALLREDUCE && node->GetType() != HVDCALLBACKALLREDUCE) ||
  445. node->GetInDataNodes().size() <= 1) {
  446. continue;
  447. }
  448. string reduce_stream_label;
  449. GE_CHECK_NOTNULL(node->GetOpDesc());
  450. (void)AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, reduce_stream_label);
  451. set<NodePtr> cur_nodes = {node};
  452. while (!cur_nodes.empty()) {
  453. set<NodePtr> all_out_data_nodes;
  454. for (auto &curr_node : cur_nodes) {
  455. for (const NodePtr &out_node : curr_node->GetOutDataNodes()) {
  456. string out_stream_label;
  457. GE_CHECK_NOTNULL(out_node->GetOpDesc());
  458. (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, out_stream_label);
  459. // normally, Allreduce do not have streamLabel. when in horovod scenario Allreduce will have streamLabel
  460. bool isSuccessorParallel =
  461. (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty());
  462. if (isSuccessorParallel) {
  463. all_reduce_succs.emplace(out_node);
  464. all_out_data_nodes.emplace(out_node);
  465. }
  466. }
  467. }
  468. cur_nodes = all_out_data_nodes;
  469. }
  470. }
  471. map<int64_t, int64_t> old_stream_to_new;
  472. for (const NodePtr &node : all_reduce_succs) {
  473. GE_CHECK_NOTNULL(node->GetOpDesc());
  474. auto old_stream = node->GetOpDesc()->GetStreamId();
  475. if (old_stream != kInvalidStream) {
  476. int64_t new_stream = kInvalidStream;
  477. auto iter = old_stream_to_new.find(old_stream);
  478. if (iter != old_stream_to_new.end()) {
  479. new_stream = iter->second;
  480. } else {
  481. new_stream = context.next_stream;
  482. context.next_stream++;
  483. old_stream_to_new.emplace(old_stream, new_stream);
  484. }
  485. GELOGI("Stream of node %s has been updated from %ld to %ld.", node->GetName().c_str(), old_stream, new_stream);
  486. node->GetOpDesc()->SetStreamId(new_stream);
  487. }
  488. }
  489. return !all_reduce_succs.empty() ? SUCCESS : NOT_CHANGED;
  490. }
  491. LogicalStreamAllocator::LogicalStreamAllocator(const map<string, SchedulerConf> &scheduler_confs,
  492. const map<string, int> &max_parallel_num)
  493. : scheduler_confs_(scheduler_confs), max_parallel_num_(max_parallel_num) {}
  494. void LogicalStreamAllocator::EnableSingleStream(bool enable) { context_.enable_single_stream = enable; }
  495. void LogicalStreamAllocator::EnableHcomParallel(bool enable) { context_.enable_hcom_parallel = enable; }
  496. Status LogicalStreamAllocator::Assign(const ComputeGraphPtr &root_graph, const Graph2SubGraphInfoList &subgraph_map,
  497. int64_t &stream_num) {
  498. GE_CHECK_NOTNULL(root_graph);
  499. map<string, EngineConfPtr> engine_confs;
  500. GE_TIMESTAMP_START(InitEngineConfs);
  501. for (const auto &item : scheduler_confs_) {
  502. const SchedulerConf &scheduler = item.second;
  503. for (const auto &engine_pair : scheduler.cal_engines) {
  504. EngineConfPtr engine_conf = engine_pair.second;
  505. if (engine_conf != nullptr) {
  506. engine_confs[engine_pair.first] = engine_conf;
  507. }
  508. }
  509. }
  510. GE_TIMESTAMP_END(InitEngineConfs, "GraphBuilder::AssignStreamInitEngineConfs");
  511. Status status = DoAssign(root_graph, subgraph_map, engine_confs);
  512. if (status != SUCCESS) {
  513. GELOGE(status, "Assign streams failed.");
  514. return status;
  515. }
  516. vector<ComputeGraphPtr> subgraphs = root_graph->GetAllSubgraphs();
  517. for (const ComputeGraphPtr &subgraph : subgraphs) {
  518. Status status = DoAssign(subgraph, subgraph_map, engine_confs);
  519. if (status != SUCCESS) {
  520. GELOGE(status, "Assign streams failed.");
  521. return status;
  522. }
  523. }
  524. RefreshContinuousStreams(root_graph);
  525. stream_num = context_.next_stream;
  526. GELOGI("Assigned logical stream num: %ld.", stream_num);
  527. return SUCCESS;
  528. }
  529. Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Graph2SubGraphInfoList &subgraph_map,
  530. const map<string, EngineConfPtr> &engine_confs) {
  531. GE_CHECK_NOTNULL(graph);
  532. NodePtr parent_node = graph->GetParentNode();
  533. if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) {
  534. context_.default_stream = kInvalidStream;
  535. } else {
  536. context_.default_stream = parent_node->GetOpDesc()->GetStreamId();
  537. }
  538. auto iter = subgraph_map.find(graph);
  539. if (iter == subgraph_map.end()) {
  540. GELOGE(FAILED, "Graph %s not found.", graph->GetName().c_str());
  541. return FAILED;
  542. }
  543. const vector<SubGraphInfoPtr> &subgraph_info_list = iter->second;
  544. vector<SubgraphPtr> subgraphs;
  545. GE_TIMESTAMP_START(ConvertSubgraphs);
  546. Status status = ConvertSubgraphs(subgraph_info_list, engine_confs, subgraphs);
  547. GE_TIMESTAMP_END(ConvertSubgraphs, "GraphBuilder::AssignStreamConvertSubgraphs");
  548. if (status != SUCCESS) {
  549. GELOGE(status, "Create subgraphs failed.");
  550. return status;
  551. }
  552. GELOGI("Subgraphs of graph %s:", graph->GetName().c_str());
  553. for (const auto &subgraph : subgraphs) {
  554. if (subgraph != nullptr) {
  555. GELOGI("subgraph: %s", subgraph->name.c_str());
  556. }
  557. }
  558. return RunPasses(graph, subgraphs);
  559. }
  560. Status LogicalStreamAllocator::ConvertSubgraphs(const vector<SubGraphInfoPtr> &subgraph_infos,
  561. const map<string, EngineConfPtr> &engine_confs,
  562. vector<SubgraphPtr> &subgraphs) {
  563. for (auto &subgraph_info : subgraph_infos) {
  564. GE_CHECK_NOTNULL(subgraph_info);
  565. string subgraph_name;
  566. ComputeGraphPtr computer_graph = subgraph_info->GetSubGraph();
  567. if (computer_graph != nullptr) {
  568. subgraph_name = computer_graph->GetName();
  569. }
  570. const string &engine_name = subgraph_info->GetEngineName();
  571. auto engine_conf_iter = engine_confs.find(engine_name);
  572. if ((engine_conf_iter == engine_confs.end()) || (engine_conf_iter->second == nullptr)) {
  573. GELOGE(INTERNAL_ERROR, "Engine conf of subgraph %s not found (engine name: %s).", subgraph_name.c_str(),
  574. engine_name.c_str());
  575. return INTERNAL_ERROR;
  576. }
  577. SubgraphPtr subgraph = MakeShared<Subgraph>(*subgraph_info, *engine_conf_iter->second);
  578. GE_CHECK_NOTNULL(subgraph);
  579. subgraph->name = subgraph_name;
  580. auto parallel_iter = max_parallel_num_.find(engine_name);
  581. if (parallel_iter != max_parallel_num_.end()) {
  582. subgraph->max_parallel_num = parallel_iter->second;
  583. }
  584. subgraphs.emplace_back(subgraph);
  585. }
  586. return SUCCESS;
  587. }
  588. Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vector<SubgraphPtr> &subgraphs) {
  589. vector<LogicalStreamPassPtr> passes;
  590. if (context_.enable_single_stream) {
  591. passes.emplace_back(MakeShared<SingleStreamPass>());
  592. passes.emplace_back(MakeShared<NodeStreamUpdatePass>());
  593. } else {
  594. passes.emplace_back(MakeShared<AssignByLabelPass>());
  595. passes.emplace_back(MakeShared<IndependentStreamPass>());
  596. passes.emplace_back(MakeShared<AssignByDependencyPass>());
  597. passes.emplace_back(MakeShared<NodeStreamUpdatePass>());
  598. passes.emplace_back(MakeShared<AllReduceParallelPass>());
  599. }
  600. for (auto &pass : passes) {
  601. GE_CHECK_NOTNULL(pass);
  602. Status status = pass->Run(graph, subgraphs, context_);
  603. if (status == SUCCESS) {
  604. GELOGI("Stream pass %s return SUCCESS.", pass->GetName().c_str());
  605. } else if (status == NOT_CHANGED) {
  606. GELOGI("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
  607. } else {
  608. GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str());
  609. return status;
  610. }
  611. }
  612. return SUCCESS;
  613. }
  614. void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &graph) {
  615. int64_t stream_num = context_.next_stream;
  616. vector<bool> stream_has_node(stream_num);
  617. for (const NodePtr &node : graph->GetAllNodes()) {
  618. if (node != nullptr) {
  619. auto op_desc = node->GetOpDesc();
  620. if (op_desc != nullptr) {
  621. int64_t stream_id = op_desc->GetStreamId();
  622. if (stream_id != kInvalidStream && stream_id < stream_num) {
  623. stream_has_node[stream_id] = true;
  624. }
  625. }
  626. }
  627. }
  628. context_.next_stream = 0;
  629. vector<int64_t> old_to_new_streams(stream_num, kInvalidStream);
  630. for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) {
  631. if (stream_has_node[old_stream]) {
  632. old_to_new_streams[old_stream] = context_.next_stream;
  633. ++context_.next_stream;
  634. }
  635. }
  636. for (const NodePtr &node : graph->GetAllNodes()) {
  637. auto op_desc = node->GetOpDesc();
  638. if (op_desc != nullptr) {
  639. int64_t stream_id = op_desc->GetStreamId();
  640. if (stream_id != kInvalidStream && stream_id < stream_num) {
  641. op_desc->SetStreamId(old_to_new_streams[stream_id]);
  642. }
  643. }
  644. }
  645. }
  646. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示