You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

logical_stream_allocator.cc 30 kB

5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/logical_stream_allocator.h"
  17. #include <queue>
  18. #include "common/ge/ge_util.h"
  19. #include "framework/common/debug/ge_log.h"
  20. #include "framework/common/fmk_error_codes.h"
  21. #include "framework/common/types.h"
  22. #include "graph/debug/ge_attr_define.h"
  23. #include "graph/utils/graph_utils.h"
  24. #include "graph/common/ge_call_wrapper.h"
  25. using std::map;
  26. using std::set;
  27. using std::string;
  28. using std::vector;
  29. using std::queue;
  30. namespace ge {
  31. LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {}
  32. const string &LogicalStreamPass::GetName() const {
  33. return name_;
  34. }
  35. bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const {
  36. return subgraph.engine_conf.skip_assign_stream;
  37. }
  38. bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const {
  39. return subgraph.engine_conf.attach;
  40. }
  41. bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const {
  42. return subgraph.engine_conf.independent;
  43. }
  44. bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const {
  45. return !subgraph.subgraph_info.GetStreamLabel().empty();
  46. }
  47. bool LogicalStreamPass::HasAssignedStream(const Subgraph &subgraph) const {
  48. return subgraph.stream_id != kInvalidStream;
  49. }
  50. Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  51. bool changed = false;
  52. int64_t &next_stream = context.next_stream;
  53. map<string, int64_t> label_streams;
  54. for (const SubgraphPtr &subgraph : subgraphs) {
  55. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  56. if (!stream_label.empty()) {
  57. // Subgraphs of the same stream_label are assigned to the same stream,
  58. // and different stream_labels are assigned new streams.
  59. auto iter = label_streams.find(stream_label);
  60. if (iter == label_streams.end()) {
  61. subgraph->stream_id = next_stream;
  62. GELOGI("[Assign][NewStreamId] %ld for label %s.", next_stream, stream_label.c_str());
  63. label_streams.emplace(stream_label, next_stream);
  64. next_stream++;
  65. } else {
  66. subgraph->stream_id = iter->second;
  67. }
  68. changed = true;
  69. }
  70. }
  71. return changed ? SUCCESS : NOT_CHANGED;
  72. }
  73. Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  74. bool changed = false;
  75. int64_t &next_stream = context.next_stream;
  76. // <engine, <label, stream>>
  77. map<string, map<string, int64_t>> engine_streams;
  78. for (const SubgraphPtr &subgraph : subgraphs) {
  79. if (!IsEngineIndependent(*subgraph)) {
  80. continue;
  81. }
  82. const string &engine = subgraph->engine_conf.id;
  83. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  84. auto &label_streams = engine_streams[engine];
  85. auto iter = label_streams.find(stream_label);
  86. if (iter == label_streams.end()) {
  87. subgraph->stream_id = next_stream;
  88. GELOGI("[Assign][NewStreamId:independent] %ld for engine %s (label: %s).", next_stream, engine.c_str(),
  89. stream_label.c_str());
  90. label_streams.emplace(stream_label, next_stream);
  91. next_stream++;
  92. } else {
  93. subgraph->stream_id = iter->second;
  94. }
  95. changed = true;
  96. }
  97. return changed ? SUCCESS : NOT_CHANGED;
  98. }
  99. Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  100. bool changed = false;
  101. map<NodePtr, SubgraphPtr> end_subgraph_map;
  102. map<NodePtr, SubgraphPtr> pld_subgraph_map;
  103. InitEndSubgraphMap(subgraphs, end_subgraph_map);
  104. InitPldSubgraphMap(subgraphs, pld_subgraph_map);
  105. for (const SubgraphPtr &subgraph : subgraphs) {
  106. if (HasAssignedStream(*subgraph)) {
  107. continue;
  108. }
  109. SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map);
  110. if (reusable_subgraph == nullptr) {
  111. (void)AssignNewStream(subgraph);
  112. } else {
  113. if (HasAssignedStream(*reusable_subgraph)) {
  114. subgraph->stream_id = reusable_subgraph->stream_id;
  115. } else {
  116. int64_t stream_id = AssignNewStream(reusable_subgraph);
  117. subgraph->stream_id = stream_id;
  118. GELOGI("[Assign][NewStreamId] %ld for Reusable subgraph %s cause has not been assigned before.",
  119. stream_id, reusable_subgraph->name.c_str());
  120. }
  121. if (reusable_subgraph->reused_subgraph != nullptr) {
  122. reusable_subgraph = reusable_subgraph->reused_subgraph;
  123. }
  124. subgraph->reused_subgraph = reusable_subgraph;
  125. reused_subgraphs_.emplace_back(subgraph, reusable_subgraph);
  126. GELOGI("[Reuse][Stream]Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.",
  127. subgraph->name.c_str(),
  128. subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(),
  129. reusable_subgraph->engine_conf.id.c_str());
  130. }
  131. changed = true;
  132. }
  133. UpdateAssignedSubgraphs(context);
  134. UpdateReusedSubgraphs();
  135. return changed ? SUCCESS : NOT_CHANGED;
  136. }
  137. void AssignByDependencyPass::InitEndSubgraphMap(const vector<SubgraphPtr> &subgraphs,
  138. map<NodePtr, SubgraphPtr> &end_subgraph_map) {
  139. for (const auto &subgraph : subgraphs) {
  140. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  141. for (const auto &item : subgraph_info.GetEnd2PldMap()) {
  142. end_subgraph_map.emplace(item.first, subgraph);
  143. }
  144. }
  145. }
  146. void AssignByDependencyPass::InitPldSubgraphMap(const vector<SubgraphPtr> &subgraphs,
  147. map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  148. for (const auto &subgraph : subgraphs) {
  149. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  150. for (const auto &item : subgraph_info.GetPld2EndMap()) {
  151. pld_subgraph_map.emplace(item.first, subgraph);
  152. }
  153. }
  154. }
  155. bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const SubgraphPtr &pred_subgraph,
  156. const map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  157. if ((subgraph == nullptr) || (pred_subgraph == nullptr)) {
  158. return false;
  159. }
  160. if (subgraph->engine_conf.scheduler_id != pred_subgraph->engine_conf.scheduler_id) {
  161. return false;
  162. }
  163. if (IsEngineIndependent(*pred_subgraph) || HasStreamLabel(*pred_subgraph)) {
  164. return false;
  165. }
  166. // If the engine of the predecessor subgraph is the same as the other successor subgraphs, the stream is not reused.
  167. for (const auto &end_pld_pair : pred_subgraph->subgraph_info.GetEnd2PldMap()) {
  168. auto iter = pld_subgraph_map.find(end_pld_pair.second);
  169. if (iter != pld_subgraph_map.end()) {
  170. const SubgraphPtr &pred_subgraph_succ = iter->second;
  171. if ((pred_subgraph_succ != subgraph) &&
  172. (pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) {
  173. return false;
  174. }
  175. }
  176. }
  177. if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) ||
  178. IsEngineAttach(*subgraph)) {
  179. return true;
  180. }
  181. if ((pred_subgraph->reused_subgraph != nullptr) &&
  182. (pred_subgraph->reused_subgraph->engine_conf.id == subgraph->engine_conf.id)) {
  183. return true;
  184. }
  185. return false;
  186. }
  187. LogicalStreamPass::SubgraphPtr AssignByDependencyPass::GetReusableSubgraph(
  188. const SubgraphPtr &subgraph, const map<NodePtr, SubgraphPtr> &end_subgraph_map,
  189. const map<NodePtr, SubgraphPtr> &pld_subgraph_map) {
  190. const SubGraphInfo &subgraph_info = subgraph->subgraph_info;
  191. for (const auto &pld_2_end : subgraph_info.GetPld2EndMap()) {
  192. const NodePtr &peer_end = pld_2_end.second;
  193. auto iter = end_subgraph_map.find(peer_end);
  194. if (iter != end_subgraph_map.end()) {
  195. const SubgraphPtr &pred_subgraph = iter->second;
  196. if (CouldReuse(subgraph, pred_subgraph, pld_subgraph_map)) {
  197. return pred_subgraph;
  198. }
  199. }
  200. }
  201. return nullptr;
  202. }
  203. int64_t AssignByDependencyPass::AssignNewStream(SubgraphPtr subgraph) {
  204. const string &engine_name = subgraph->engine_conf.id;
  205. int64_t max_parallel_num = subgraph->max_parallel_num;
  206. int64_t stream_id = 0;
  207. auto next_iter = engine_next_streams_.find(engine_name);
  208. if (next_iter != engine_next_streams_.end()) {
  209. stream_id = next_iter->second;
  210. }
  211. if (stream_id >= max_parallel_num) {
  212. stream_id = 0;
  213. }
  214. subgraph->stream_id = stream_id;
  215. engine_next_streams_[engine_name] = stream_id + 1;
  216. assigned_subgraphs_.emplace_back(subgraph);
  217. if ((stream_id + 1) > engine_stream_num_[engine_name]) {
  218. engine_stream_num_[engine_name] = stream_id + 1;
  219. }
  220. GELOGI("[Assign][NewStreamId:temp]id:%ld for Subgraph %s (engine: %s).", stream_id, subgraph->name.c_str(),
  221. engine_name.c_str());
  222. return stream_id;
  223. }
  224. void AssignByDependencyPass::UpdateAssignedSubgraphs(Context &context) {
  225. // If the default stream is valid, the first assigned stream will reuse the default stream id
  226. // and other streams use new id. To ensure that the id of the new stream is continuous,
  227. // we first subtract one from next_stream.
  228. int64_t to_be_updated_stream = kInvalidStream;
  229. if (context.default_stream != kInvalidStream) {
  230. context.next_stream--;
  231. to_be_updated_stream = context.next_stream;
  232. }
  233. // Update the starting stream id for each engine.
  234. int64_t &next_stream = context.next_stream;
  235. map<string, int64_t> engine_start_streams;
  236. for (const auto &item : engine_stream_num_) {
  237. int64_t stream_count = item.second;
  238. engine_start_streams[item.first] = next_stream;
  239. next_stream += stream_count;
  240. }
  241. // Update the subgraph streams assigned by engine.
  242. for (auto &subgraph : assigned_subgraphs_) {
  243. subgraph->stream_id += engine_start_streams[subgraph->engine_conf.id];
  244. if (subgraph->stream_id == to_be_updated_stream) {
  245. subgraph->stream_id = context.default_stream;
  246. GELOGI("Subgraph %s of engine %s reuses default stream %ld.", subgraph->name.c_str(),
  247. subgraph->engine_conf.id.c_str(), context.default_stream);
  248. } else {
  249. GELOGI("[Update][StreamId]id:%ld for subgraph %s.", subgraph->stream_id, subgraph->name.c_str());
  250. }
  251. }
  252. }
  253. void AssignByDependencyPass::UpdateReusedSubgraphs() {
  254. // Update streams for the subgraphs of reusing stream.
  255. for (const auto &item : reused_subgraphs_) {
  256. auto &cur_subgraph = item.first;
  257. auto &reused_graph = item.second;
  258. cur_subgraph->stream_id = reused_graph->stream_id;
  259. GELOGI("[Update][StreamId]id:%ld for subgraph %s.", cur_subgraph->stream_id, cur_subgraph->name.c_str());
  260. }
  261. }
  262. Status SingleStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  263. // context.default_stream can be kInvalidStream only when graph is the root graph.
  264. int64_t new_stream = context.default_stream;
  265. if (new_stream == kInvalidStream) {
  266. new_stream = context.next_stream;
  267. ++context.next_stream;
  268. }
  269. for (const SubgraphPtr &subgraph : subgraphs) {
  270. if (!HasAssignedStream(*subgraph)) {
  271. const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
  272. if (!stream_label.empty()) {
  273. REPORT_INNER_ERROR("E19999", "Stream labels are not supported in SingleStream mode "
  274. "(subgraph: %s, stream label: %s)", subgraph->name.c_str(), stream_label.c_str());
  275. GELOGE(INTERNAL_ERROR, "[Get][Label] Stream labels are not supported (subgraph: %s, stream label: %s).",
  276. subgraph->name.c_str(), stream_label.c_str());
  277. return INTERNAL_ERROR;
  278. }
  279. subgraph->stream_id = new_stream;
  280. }
  281. }
  282. return SUCCESS;
  283. }
  284. Status NodeStreamUpdatePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  285. // Check if all subgraphs have been assigned a stream.
  286. for (const SubgraphPtr &subgraph : subgraphs) {
  287. const string &engine_name = subgraph->engine_conf.id;
  288. if (!IsEngineSkip(*subgraph) && !HasAssignedStream(*subgraph)) {
  289. REPORT_INNER_ERROR("E19999", "Subgraph %s has not yet been assigned a stream (engine: %s)",
  290. subgraph->name.c_str(), engine_name.c_str());
  291. GELOGE(INTERNAL_ERROR, "[Check][Param] Subgraph %s has not yet been assigned a stream (engine: %s).",
  292. subgraph->name.c_str(), engine_name.c_str());
  293. return INTERNAL_ERROR;
  294. } else {
  295. GELOGI("[Assign][StreamId] %ld for Subgraph %s (engine: %s).", subgraph->stream_id, subgraph->name.c_str(),
  296. engine_name.c_str());
  297. }
  298. }
  299. // Init the stream id of node.
  300. for (NodePtr &node : graph->GetDirectNode()) {
  301. GE_CHECK_NOTNULL(node->GetOpDesc());
  302. node->GetOpDesc()->SetStreamId(kInvalidStream);
  303. }
  304. // Set the stream id of the subgraph to the node.
  305. for (const SubgraphPtr &subgraph : subgraphs) {
  306. int64_t stream_id = subgraph->stream_id;
  307. const string &engine_name = subgraph->engine_conf.id;
  308. auto compute_graph = subgraph->subgraph_info.GetSubGraph();
  309. for (NodePtr &node : compute_graph->GetDirectNode()) {
  310. GE_CHECK_NOTNULL(node->GetOpDesc());
  311. if (node->GetOpDesc()->HasAttr(ATTR_NAME_RTS_LABEL_NODE)) {
  312. node->GetOpDesc()->SetStreamId(context.default_stream);
  313. GELOGD("Node %s of type %s in subgraph %s is assigned parent stream %ld (engine: %s).", node->GetName().c_str(),
  314. node->GetType().c_str(), subgraph->name.c_str(), context.default_stream, engine_name.c_str());
  315. } else if (IsEngineSkip(*subgraph) && node->GetInNodes().empty()) {
  316. GELOGD("[Skip][StreamIdAssign]Node %s of type %s in subgraph %s doesn't need (engine: %s).",
  317. node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str());
  318. } else {
  319. node->GetOpDesc()->SetStreamId(stream_id);
  320. GELOGD("[Assign][StreamId]id:%ld for Node %s of type %s in subgraph %s (engine: %s).", stream_id,
  321. node->GetName().c_str(), node->GetType().c_str(), subgraph->name.c_str(), engine_name.c_str());
  322. }
  323. }
  324. }
  325. return SUCCESS;
  326. }
  327. Status UpdateForParallelGroupPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  328. std::map<int, vector<OpDescPtr>> stream_op_map;
  329. for (const SubgraphPtr &subgraph : subgraphs) {
  330. auto compute_graph = subgraph->subgraph_info.GetSubGraph();
  331. for (const NodePtr &node : compute_graph->GetDirectNode()) {
  332. OpDescPtr op_desc = node->GetOpDesc();
  333. GE_CHECK_NOTNULL(op_desc);
  334. if (op_desc->HasAttr(ATTR_NAME_PARALLEL_GROUP)) {
  335. int64_t op_desc_stream_id = op_desc->GetStreamId();
  336. stream_op_map[op_desc_stream_id].push_back(op_desc);
  337. }
  338. }
  339. }
  340. for (const auto &itr : stream_op_map) {
  341. if (itr.first == kInvalidStream) {
  342. continue;
  343. }
  344. std::map<std::string, int64_t> group_2_stream_id;
  345. for (const auto &op_desc : itr.second) {
  346. std::string group_name;
  347. if (!AttrUtils::GetStr(op_desc, ATTR_NAME_PARALLEL_GROUP, group_name)) {
  348. GELOGE(FAILED, "[Get][Attr] ATTR_NAME_PARALLEL_GROUP of node %s failed.", op_desc->GetName().c_str());
  349. REPORT_INNER_ERROR("E19999", "Get node %s ATTR_NAME_PARALLEL_GROUP failed.", op_desc->GetName().c_str());
  350. return FAILED;
  351. }
  352. const auto &itr = group_2_stream_id.find(group_name);
  353. int64_t new_stream_id = kInvalidStream;
  354. int64_t old_stream_id = op_desc->GetStreamId();
  355. if (itr != group_2_stream_id.end()) {
  356. new_stream_id = itr->second;
  357. } else {
  358. new_stream_id = context.next_stream++;
  359. group_2_stream_id[group_name] = new_stream_id;
  360. }
  361. op_desc->SetStreamId(new_stream_id);
  362. GELOGD("Node %s assigned stream %ld from stream %ld.",
  363. op_desc->GetName().c_str(), new_stream_id, old_stream_id);
  364. }
  365. }
  366. return SUCCESS;
  367. }
  368. int64_t UpdateForSkippedEnginePass::GetSingleInoutStream(const NodePtr &node) const {
  369. set<int64_t> stream_ids;
  370. for (const auto &in_node : node->GetInAllNodes()) {
  371. GE_CHECK_NOTNULL_EXEC(in_node->GetOpDesc(), return kInvalidStream);
  372. int64_t stream_id = in_node->GetOpDesc()->GetStreamId();
  373. if (stream_id != kInvalidStream) {
  374. stream_ids.insert(stream_id);
  375. }
  376. }
  377. for (const auto &out_node : node->GetOutAllNodes()) {
  378. GE_CHECK_NOTNULL_EXEC(out_node->GetOpDesc(), return kInvalidStream);
  379. int64_t stream_id = out_node->GetOpDesc()->GetStreamId();
  380. if (stream_id != kInvalidStream) {
  381. stream_ids.insert(stream_id);
  382. }
  383. }
  384. if (stream_ids.size() == 1) {
  385. int64_t stream_id = *(stream_ids.begin());
  386. GELOGI("[Get][SingleStreamId]The stream of all input and output nodes of node %s (type: %s) is %ld.",
  387. node->GetName().c_str(), node->GetType().c_str(), stream_id);
  388. return stream_id;
  389. }
  390. return kInvalidStream;
  391. }
  392. Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  393. set<OpDescPtr> ops_without_label;
  394. // Check if subgraph is engine skipped and without stream label or not
  395. for (const SubgraphPtr &subgraph : subgraphs) {
  396. if (IsEngineSkip(*subgraph)) {
  397. auto compute_graph = subgraph->subgraph_info.GetSubGraph();
  398. for (NodePtr &node : compute_graph->GetDirectNode()) {
  399. auto op_desc = node->GetOpDesc();
  400. GE_CHECK_NOTNULL(op_desc);
  401. auto stream_id = op_desc->GetStreamId();
  402. if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) {
  403. ops_without_label.emplace(op_desc);
  404. }
  405. }
  406. }
  407. }
  408. // Try reassign the stream id
  409. for (ge::NodePtr &node : graph->GetDirectNode()) {
  410. auto op_desc = node->GetOpDesc();
  411. GE_CHECK_NOTNULL(op_desc);
  412. if (op_desc->HasAttr(ATTR_NAME_THREAD_SCOPE_ID)) {
  413. op_desc->SetStreamId(kInvalidStream);
  414. GELOGI("Ffts node %s of type %s reassign to invalid stream.", node->GetName().c_str(), node->GetType().c_str());
  415. continue;
  416. }
  417. int64_t stream_id = op_desc->GetStreamId();
  418. if (ops_without_label.find(op_desc) != ops_without_label.end()) {
  419. if (AreAllPredStreamsInvalid(node) && op_desc->GetSubgraphInstanceNames().empty()) {
  420. op_desc->SetStreamId(kInvalidStream);
  421. GELOGI("Node %s of type %s reassign to stream %ld from stream %ld.", node->GetName().c_str(),
  422. node->GetType().c_str(), kInvalidStream, stream_id);
  423. } else if (!node->GetOutAllNodes().empty()) {
  424. int64_t inout_stream = GetSingleInoutStream(node);
  425. if (inout_stream != kInvalidStream) {
  426. op_desc->SetStreamId(inout_stream);
  427. GELOGI("[Reassign][StreamId]%ld for Node %s of type %s from stream %ld.",
  428. inout_stream, node->GetName().c_str(), node->GetType().c_str(), stream_id);
  429. }
  430. }
  431. }
  432. }
  433. return SUCCESS;
  434. }
  435. bool UpdateForSkippedEnginePass::AreAllPredStreamsInvalid(const NodePtr &node) const {
  436. for (const auto &pre_node : node->GetInAllNodes()) {
  437. auto pre_node_desc = pre_node->GetOpDesc();
  438. if (pre_node_desc != nullptr) {
  439. int64_t stream_id = pre_node_desc->GetStreamId();
  440. if (stream_id != kInvalidStream) {
  441. return false;
  442. }
  443. }
  444. }
  445. return true;
  446. }
  447. Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &subgraphs, Context &context) {
  448. if (!context.enable_hcom_parallel) {
  449. return NOT_CHANGED;
  450. }
  451. GELOGI("[Run][AllReduceParallelPass] start");
  452. GE_DUMP(graph, "BeforeAllReduceParallel");
  453. // All successors of HcomAllReduce.
  454. set<NodePtr> all_reduce_succs;
  455. for (const NodePtr &node : graph->GetDirectNode()) {
  456. if (!IsHcomNode(node->GetType()) ||
  457. (node->GetInDataNodes().size() <= 1)) {
  458. continue;
  459. }
  460. string reduce_stream_label;
  461. GE_CHECK_NOTNULL(node->GetOpDesc());
  462. (void)AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, reduce_stream_label);
  463. set<NodePtr> cur_nodes = {node};
  464. while (!cur_nodes.empty()) {
  465. set<NodePtr> all_out_data_nodes;
  466. for (auto &curr_node : cur_nodes) {
  467. for (const NodePtr &out_node : curr_node->GetOutDataNodes()) {
  468. string out_stream_label;
  469. GE_CHECK_NOTNULL(out_node->GetOpDesc());
  470. (void)AttrUtils::GetStr(out_node->GetOpDesc(), ATTR_NAME_STREAM_LABEL, out_stream_label);
  471. // normally, Allreduce do not have streamLabel. when in horovod scenario Allreduce will have streamLabel
  472. bool isSuccessorParallel =
  473. (out_stream_label == reduce_stream_label) || (!reduce_stream_label.empty() && out_stream_label.empty());
  474. if (isSuccessorParallel) {
  475. all_reduce_succs.emplace(out_node);
  476. all_out_data_nodes.emplace(out_node);
  477. }
  478. }
  479. }
  480. cur_nodes = all_out_data_nodes;
  481. }
  482. }
  483. map<int64_t, int64_t> old_stream_to_new;
  484. for (const NodePtr &node : all_reduce_succs) {
  485. GE_CHECK_NOTNULL(node->GetOpDesc());
  486. auto old_stream = node->GetOpDesc()->GetStreamId();
  487. if (old_stream != kInvalidStream) {
  488. int64_t new_stream = kInvalidStream;
  489. auto iter = old_stream_to_new.find(old_stream);
  490. if (iter != old_stream_to_new.end()) {
  491. new_stream = iter->second;
  492. } else {
  493. new_stream = context.next_stream;
  494. context.next_stream++;
  495. old_stream_to_new.emplace(old_stream, new_stream);
  496. }
  497. if (!IsHcomNode(node->GetType())) {
  498. GELOGI("Stream of node %s has been updated from %ld to %ld.", node->GetName().c_str(), old_stream, new_stream);
  499. node->GetOpDesc()->SetStreamId(new_stream);
  500. }
  501. }
  502. }
  503. return !all_reduce_succs.empty() ? SUCCESS : NOT_CHANGED;
  504. }
  505. bool AllReduceParallelPass::IsHcomNode(const std::string& node_type) {
  506. return (node_type == HCOMALLREDUCE || node_type == HVDCALLBACKALLREDUCE);
  507. }
  508. LogicalStreamAllocator::LogicalStreamAllocator(const map<string, SchedulerConf> &scheduler_confs,
  509. const map<string, int> &max_parallel_num)
  510. : scheduler_confs_(scheduler_confs), max_parallel_num_(max_parallel_num) {}
  511. void LogicalStreamAllocator::EnableSingleStream(bool enable) { context_.enable_single_stream = enable; }
  512. void LogicalStreamAllocator::EnableHcomParallel(bool enable) { context_.enable_hcom_parallel = enable; }
  513. Status LogicalStreamAllocator::Assign(const ComputeGraphPtr &root_graph, const Graph2SubGraphInfoList &subgraph_map,
  514. int64_t &stream_num) {
  515. GE_CHECK_NOTNULL(root_graph);
  516. map<string, EngineConfPtr> engine_confs;
  517. GE_TIMESTAMP_START(InitEngineConfs);
  518. for (const auto &item : scheduler_confs_) {
  519. const SchedulerConf &scheduler = item.second;
  520. for (const auto &engine_pair : scheduler.cal_engines) {
  521. EngineConfPtr engine_conf = engine_pair.second;
  522. if (engine_conf != nullptr) {
  523. engine_confs[engine_pair.first] = engine_conf;
  524. }
  525. }
  526. }
  527. GE_TIMESTAMP_END(InitEngineConfs, "GraphBuilder::AssignStreamInitEngineConfs");
  528. Status status = DoAssign(root_graph, subgraph_map, engine_confs);
  529. if (status != SUCCESS) {
  530. GELOGE(status, "[Assign][Streams] failed, graph:%s.", root_graph->GetName().c_str());
  531. return status;
  532. }
  533. vector<ComputeGraphPtr> subgraphs = root_graph->GetAllSubgraphs();
  534. for (const ComputeGraphPtr &subgraph : subgraphs) {
  535. Status status = DoAssign(subgraph, subgraph_map, engine_confs);
  536. if (status != SUCCESS) {
  537. GELOGE(status, "[Assign][Streams] failed, graph:%s.", subgraph->GetName().c_str());
  538. return status;
  539. }
  540. }
  541. RefreshContinuousStreams(root_graph);
  542. stream_num = context_.next_stream;
  543. GELOGI("[Assign][LogicalStream] At last, stream num: %ld.", stream_num);
  544. return SUCCESS;
  545. }
  546. Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Graph2SubGraphInfoList &subgraph_map,
  547. const map<string, EngineConfPtr> &engine_confs) {
  548. GE_CHECK_NOTNULL(graph);
  549. NodePtr parent_node = graph->GetParentNode();
  550. if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) {
  551. context_.default_stream = kInvalidStream;
  552. } else {
  553. context_.default_stream = parent_node->GetOpDesc()->GetStreamId();
  554. }
  555. auto iter = subgraph_map.find(graph);
  556. if (iter == subgraph_map.end()) {
  557. REPORT_INNER_ERROR("E19999", "Graph %s not found in subgraph_map when do logical stream assign ",
  558. graph->GetName().c_str());
  559. GELOGE(FAILED, "[Check][Param] Graph %s not found.", graph->GetName().c_str());
  560. return FAILED;
  561. }
  562. const vector<SubGraphInfoPtr> &subgraph_info_list = iter->second;
  563. vector<SubgraphPtr> subgraphs;
  564. GE_TIMESTAMP_START(ConvertSubgraphs);
  565. Status status = ConvertSubgraphs(subgraph_info_list, engine_confs, subgraphs);
  566. GE_TIMESTAMP_END(ConvertSubgraphs, "GraphBuilder::AssignStreamConvertSubgraphs");
  567. if (status != SUCCESS) {
  568. GELOGE(status, "[Convert][SubGraphs] failed.");
  569. return status;
  570. }
  571. GELOGD("[Show][Subgraphs] in graph %s", graph->GetName().c_str());
  572. for (const auto &subgraph : subgraphs) {
  573. if (subgraph != nullptr) {
  574. GELOGD("subgraph: %s", subgraph->name.c_str());
  575. }
  576. }
  577. return RunPasses(graph, subgraphs);
  578. }
  579. Status LogicalStreamAllocator::ConvertSubgraphs(const vector<SubGraphInfoPtr> &subgraph_infos,
  580. const map<string, EngineConfPtr> &engine_confs,
  581. vector<SubgraphPtr> &subgraphs) {
  582. for (auto &subgraph_info : subgraph_infos) {
  583. GE_CHECK_NOTNULL(subgraph_info);
  584. string subgraph_name;
  585. ComputeGraphPtr computer_graph = subgraph_info->GetSubGraph();
  586. if (computer_graph != nullptr) {
  587. subgraph_name = computer_graph->GetName();
  588. }
  589. const string &engine_name = subgraph_info->GetEngineName();
  590. auto engine_conf_iter = engine_confs.find(engine_name);
  591. if ((engine_conf_iter == engine_confs.end()) || (engine_conf_iter->second == nullptr)) {
  592. REPORT_INNER_ERROR("E19999", "Engine conf of subgraph %s not found (engine name: %s)",
  593. subgraph_name.c_str(), engine_name.c_str());
  594. GELOGE(INTERNAL_ERROR, "[Check][Param] Engine conf of subgraph %s not found (engine name: %s).",
  595. subgraph_name.c_str(), engine_name.c_str());
  596. return INTERNAL_ERROR;
  597. }
  598. SubgraphPtr subgraph = MakeShared<Subgraph>(*subgraph_info, *engine_conf_iter->second);
  599. GE_CHECK_NOTNULL(subgraph);
  600. subgraph->name = subgraph_name;
  601. auto parallel_iter = max_parallel_num_.find(engine_name);
  602. if (parallel_iter != max_parallel_num_.end()) {
  603. subgraph->max_parallel_num = parallel_iter->second;
  604. }
  605. subgraphs.emplace_back(subgraph);
  606. }
  607. return SUCCESS;
  608. }
  609. Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vector<SubgraphPtr> &subgraphs) {
  610. vector<LogicalStreamPassPtr> passes;
  611. if (context_.enable_single_stream) {
  612. passes.emplace_back(MakeShared<SingleStreamPass>());
  613. passes.emplace_back(MakeShared<NodeStreamUpdatePass>());
  614. passes.emplace_back(MakeShared<UpdateForSkippedEnginePass>());
  615. } else {
  616. passes.emplace_back(MakeShared<AssignByLabelPass>());
  617. passes.emplace_back(MakeShared<IndependentStreamPass>());
  618. passes.emplace_back(MakeShared<AssignByDependencyPass>());
  619. passes.emplace_back(MakeShared<NodeStreamUpdatePass>());
  620. passes.emplace_back(MakeShared<UpdateForParallelGroupPass>());
  621. passes.emplace_back(MakeShared<AllReduceParallelPass>());
  622. passes.emplace_back(MakeShared<UpdateForSkippedEnginePass>());
  623. }
  624. for (auto &pass : passes) {
  625. GE_CHECK_NOTNULL(pass);
  626. Status status = pass->Run(graph, subgraphs, context_);
  627. if (status == SUCCESS) {
  628. GELOGD("[Show][Status]Stream pass %s return SUCCESS.", pass->GetName().c_str());
  629. } else if (status == NOT_CHANGED) {
  630. GELOGD("[Show][Status]Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
  631. } else {
  632. REPORT_CALL_ERROR("E19999", "Stream pass %s run failed.", pass->GetName().c_str());
  633. GELOGE(status, "[Call][Run] Stream pass %s failed.", pass->GetName().c_str());
  634. return status;
  635. }
  636. }
  637. return SUCCESS;
  638. }
  639. void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &graph) {
  640. int64_t stream_num = context_.next_stream;
  641. vector<bool> stream_has_node(stream_num);
  642. for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
  643. if (node != nullptr) {
  644. auto op_desc = node->GetOpDesc();
  645. if (op_desc != nullptr) {
  646. int64_t stream_id = op_desc->GetStreamId();
  647. if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
  648. stream_has_node[stream_id] = true;
  649. }
  650. }
  651. }
  652. }
  653. context_.next_stream = 0;
  654. vector<int64_t> old_to_new_streams(stream_num, kInvalidStream);
  655. for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) {
  656. if (stream_has_node[old_stream]) {
  657. old_to_new_streams[old_stream] = context_.next_stream;
  658. context_.next_stream++;
  659. }
  660. }
  661. for (const NodePtr &node : graph->GetNodes(graph->GetGraphUnknownFlag())) {
  662. auto op_desc = node->GetOpDesc();
  663. if (op_desc != nullptr) {
  664. int64_t stream_id = op_desc->GetStreamId();
  665. if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
  666. op_desc->SetStreamId(old_to_new_streams[stream_id]);
  667. }
  668. }
  669. }
  670. }
  671. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示