You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dynamic_shape_partition.cc 37 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/partition/dynamic_shape_partition.h"
  17. #include <algorithm>
  18. #include <iostream>
  19. #include <memory>
  20. #include <queue>
  21. #include <sstream>
  22. #include <string>
  23. #include <unordered_set>
  24. #include <vector>
  25. #include "common/ge/ge_util.h"
  26. #include "framework/common/debug/ge_log.h"
  27. #include "framework/common/types.h"
  28. #include "graph/debug/ge_attr_define.h"
  29. #include "graph/utils/graph_utils.h"
  30. #include "graph/utils/op_desc_utils.h"
  31. #define REQUIRE(cond, ...) \
  32. do { \
  33. if (!(cond)) { \
  34. GELOGE(FAILED, "[Dynamic shape partition]" __VA_ARGS__); \
  35. return FAILED; \
  36. } \
  37. } while (0)
  38. #define REQUIRE_NOT_NULL(cond, ...) REQUIRE(((cond) != nullptr), __VA_ARGS__)
  39. #define REQUIRE_SUCCESS(cond, ...) REQUIRE(((cond) == SUCCESS), __VA_ARGS__)
  40. #define REQUIRE_GRAPH_SUCCESS(cond, ...) REQUIRE(((cond) == GRAPH_SUCCESS), __VA_ARGS__)
  41. bool IsExperimental() {
  42. const static bool kIsExperimental = (std::getenv("EXPERIMENTAL_DYNAMIC_PARTITION") != nullptr);
  43. return kIsExperimental;
  44. }
  45. namespace ge {
  46. using Cluster = DynamicShapePartitioner::Cluster;
  47. using ClusterPtr = std::shared_ptr<Cluster>;
  48. Status DynamicShapePartitioner::Partition() {
  49. REQUIRE_NOT_NULL(root_graph_, "Graph is nullptr.");
  50. if (!IsExperimental()) {
  51. GELOGD("Skip dynamic shape partition as not in experimental mode.");
  52. REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false),
  53. "Failed set dynamic shape partitioned flag on root graph.");
  54. return SUCCESS;
  55. }
  56. GELOGD("Start dynamic shape partition graph %s.", root_graph_->GetName().c_str());
  57. REQUIRE_SUCCESS(MarkUnknownShapeNodes(), "Failed mark unknown shape nodes, root grah name:%s.",
  58. root_graph_->GetName().c_str());
  59. if (unknown_shape_nodes_.empty()) {
  60. GELOGD("Skip dynamic shape partition of graph %s as all nodes are known shape.", root_graph_->GetName().c_str());
  61. REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, false),
  62. "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
  63. return SUCCESS;
  64. }
  65. REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true),
  66. "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
  67. DumpGraph("_Before_DSP");
  68. auto status = PartitionImpl();
  69. GELOGD("%s.", DebugString().c_str());
  70. if (status != SUCCESS) {
  71. GELOGE(status, "Failed dynamic shape partition graph: %s, status:\n %s", root_graph_->GetName().c_str(),
  72. DebugString().c_str());
  73. }
  74. DumpGraph("_After_DSP");
  75. GELOGD("Finish dynamic shape partition graph %s.", root_graph_->GetName().c_str());
  76. ClearResource();
  77. return status;
  78. }
  79. Status DynamicShapePartitioner::PartitionImpl() {
  80. REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed.");
  81. REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes.");
  82. REQUIRE_SUCCESS(MergeClusters(), "Failed merge clusters.");
  83. PruneUniqueClusters();
  84. REQUIRE_SUCCESS(BuildPartitionFrame(), "Failed build cluster partition frame.");
  85. REQUIRE_SUCCESS(CombinePartitionFrame(), "Failed combine cluster partition frame.");
  86. REQUIRE_SUCCESS(BuildPartitionSubgraph(), "Failed build cluster partition subgraph.");
  87. return SUCCESS;
  88. }
  89. void DynamicShapePartitioner::PruneUniqueClusters() {
  90. for (auto &node : root_graph_->GetDirectNode()) {
  91. auto cluster = node_2_cluster_[node];
  92. if (unique_clusters_.count(cluster) != 0) {
  93. continue;
  94. }
  95. if (unique_clusters_.insert(cluster).second) {
  96. sorted_unique_clusters_.emplace_back(cluster);
  97. }
  98. }
  99. auto comp_func = [](std::shared_ptr<Cluster> clu_a, std::shared_ptr<Cluster> clu_b) -> bool {
  100. return clu_a->Id() < clu_b->Id();
  101. };
  102. std::sort(sorted_unique_clusters_.begin(), sorted_unique_clusters_.end(), comp_func);
  103. }
  104. Status DynamicShapePartitioner::BuildPartitionFrame() {
  105. for (const auto &cluster : sorted_unique_clusters_) {
  106. REQUIRE_SUCCESS(cluster->BuildFrame(), "Failed build frame of cluster[%lu].", cluster->Id());
  107. }
  108. return SUCCESS;
  109. }
  110. Status DynamicShapePartitioner::CombinePartitionFrame() {
  111. for (const auto &cluster : sorted_unique_clusters_) {
  112. REQUIRE_SUCCESS(cluster->CombinePartitionFrame(), "Failed combine frame of cluster[%lu].", cluster->Id());
  113. }
  114. return SUCCESS;
  115. }
  116. Status DynamicShapePartitioner::BuildPartitionSubgraph() {
  117. for (const auto &cluster : sorted_unique_clusters_) {
  118. REQUIRE_SUCCESS(cluster->BuildPartitionSubgraph(), "Failed build subgraph of cluster[%lu].", cluster->Id());
  119. }
  120. return SUCCESS;
  121. }
  122. std::string DynamicShapePartitioner::DebugString() const {
  123. size_t unknown = 0;
  124. size_t known = 0;
  125. size_t data = 0;
  126. size_t netoutput = 0;
  127. size_t is_inputnode = 0;
  128. std::stringstream ss;
  129. ss << "All unknown shape nodes:" << std::endl;
  130. for (const auto &node : unknown_shape_nodes_) {
  131. ss << " [" << node->GetName() << "](" << node->GetType() << ")" << std::endl;
  132. }
  133. for (const auto &cluster : unique_clusters_) {
  134. if (cluster->IsUnknownShape()) {
  135. unknown++;
  136. } else if (cluster->IsKnownShape()) {
  137. known++;
  138. } else if (cluster->IsData()) {
  139. data++;
  140. } else if (cluster->IsNetOutput()) {
  141. netoutput++;
  142. } else if (cluster->IsInputNode()) {
  143. is_inputnode++;
  144. }
  145. }
  146. ss << "All clusters:" << unique_clusters_.size() << ", data:" << data << ", known:" << known
  147. << ", unknown:" << unknown << ", netoutput:" << netoutput << ", is_inputnode:" << is_inputnode << std::endl;
  148. for (const auto &cluster : unique_clusters_) {
  149. ss << " " << cluster->DebugString() << std::endl;
  150. }
  151. return ss.str();
  152. }
  153. void DynamicShapePartitioner::DumpGraph(const std::string &suffix) {
  154. GraphUtils::DumpGEGraphToOnnx(*root_graph_, root_graph_->GetName() + suffix);
  155. for (const auto &sub_graph : root_graph_->GetAllSubgraphs()) {
  156. GraphUtils::DumpGEGraphToOnnx(*sub_graph, sub_graph->GetName() + suffix);
  157. }
  158. }
  159. void DynamicShapePartitioner::ClearResource() {
  160. for (const auto &cluster : unique_clusters_) {
  161. cluster->Clear();
  162. }
  163. node_2_cluster_.clear();
  164. ordered_cluster_.clear();
  165. unique_clusters_.clear();
  166. sorted_unique_clusters_.clear();
  167. unknown_shape_nodes_.clear();
  168. root_graph_.reset();
  169. }
  170. Status DynamicShapePartitioner::MarkUnknownShapeNodes() {
  171. for (auto &node : root_graph_->GetDirectNode()) {
  172. REQUIRE_SUCCESS(CollectSpreadUnknownShapeNodes(node), "Failed collect spread unknown shape nodes %s.",
  173. node->GetName().c_str());
  174. }
  175. return SUCCESS;
  176. }
  177. Status DynamicShapePartitioner::InitClusters() {
  178. auto graph = root_graph_;
  179. size_t rank = 0;
  180. for (const auto &node : graph->GetDirectNode()) {
  181. Cluster::Type type = Cluster::DATA;
  182. bool is_input = ((node->GetType() == CONSTANT) || (node->GetType() == CONSTANTOP)) && node->GetInNodes().empty();
  183. if (node->GetType() == DATA) {
  184. type = Cluster::DATA;
  185. } else if (is_input) {
  186. type = Cluster::INPUT_NODE;
  187. } else if (node->GetType() == NETOUTPUT) {
  188. type = Cluster::NETOUTPUT;
  189. } else if (unknown_shape_nodes_.count(node) > 0) {
  190. type = Cluster::UNKNOWN_SHAPE;
  191. } else {
  192. type = Cluster::KNOWN_SHAPE;
  193. }
  194. auto cluster = MakeShared<Cluster>(rank++, type, node, this);
  195. REQUIRE_NOT_NULL(cluster, "Failed new memory for cluster.");
  196. node_2_cluster_[node] = cluster;
  197. if (cluster->IsUnknownShape()) {
  198. ordered_cluster_.push_back(cluster);
  199. }
  200. // Already sorted topologically, so access to the parent cluster is safe
  201. for (const auto &parent : node->GetInAllNodes()) {
  202. cluster->AddInput(node_2_cluster_[parent]);
  203. }
  204. }
  205. for (const auto &node : graph->GetDirectNode()) {
  206. GELOGD("Make cluster for node %s : %s.", node->GetName().c_str(), node_2_cluster_[node]->DebugString().c_str());
  207. }
  208. return SUCCESS;
  209. }
  210. Status DynamicShapePartitioner::TopologicalSortClusters() {
  211. ordered_cluster_.clear();
  212. // BFS topological sort clusters for known shape cluster
  213. std::queue<ClusterPtr> ready_clusters;
  214. std::unordered_map<ClusterPtr, size_t> cluster_pending_count;
  215. std::unordered_set<ClusterPtr> seen_clusters;
  216. for (auto &node : root_graph_->GetDirectNode()) {
  217. auto &cluster = node_2_cluster_[node];
  218. if (seen_clusters.count(cluster) != 0) {
  219. continue;
  220. }
  221. seen_clusters.insert(cluster);
  222. auto pending_count = cluster->Inputs().size();
  223. if (pending_count == 0) {
  224. ready_clusters.push(cluster);
  225. } else {
  226. cluster_pending_count[cluster] = pending_count;
  227. }
  228. }
  229. size_t rank = 0;
  230. while (!ready_clusters.empty()) {
  231. auto cluster = ready_clusters.front();
  232. ready_clusters.pop();
  233. cluster->UpdateRank(rank++);
  234. if (cluster->IsKnownShape() || cluster->IsInputNode()) {
  235. ordered_cluster_.push_back(cluster);
  236. }
  237. for (const auto &out_cluster : cluster->Outputs()) {
  238. if (cluster_pending_count[out_cluster] > 0 && --cluster_pending_count[out_cluster] == 0) {
  239. ready_clusters.push(out_cluster);
  240. }
  241. }
  242. }
  243. if (rank != seen_clusters.size()) {
  244. return FAILED;
  245. }
  246. return SUCCESS;
  247. }
  248. namespace {
  249. static std::string ToString(const std::vector<ClusterPtr> &clusters) {
  250. if (clusters.empty()) {
  251. return "()";
  252. }
  253. std::stringstream ss;
  254. ss << "(";
  255. auto iter = clusters.begin();
  256. for (size_t i = 0; i < clusters.size() - 1; i++) {
  257. ss << (*iter)->Id() << ",";
  258. iter++;
  259. }
  260. ss << (*iter)->Id() << ").";
  261. return ss.str();
  262. }
  263. }
  264. void DynamicShapePartitioner::MergeClustersUnknownShape() {
  265. // Merge unknown shape clusters
  266. for (const auto &cluster : ordered_cluster_) {
  267. for (const auto &in_cluster : cluster->Inputs()) {
  268. if (!in_cluster->IsUnknownShape()) {
  269. continue;
  270. }
  271. auto merged_clusters = cluster->MergeAllPathFrom(in_cluster);
  272. GELOGD("Merge all path cluster from %lu to %lu %s.", in_cluster->Id(), cluster->Id(),
  273. ToString(merged_clusters).c_str());
  274. for (const auto &merged_cluster : merged_clusters) {
  275. for (const auto &node : merged_cluster->Nodes()) {
  276. node_2_cluster_[node] = cluster;
  277. }
  278. }
  279. }
  280. }
  281. }
  282. void DynamicShapePartitioner::MergeClustersKnownShape() {
  283. // Merge known shape clusters
  284. for (const auto &cluster : ordered_cluster_) {
  285. if (cluster->IsRefVariable() && cluster->Inputs().size() == 1) {
  286. auto in_cluster = *(cluster->Inputs().begin());
  287. in_cluster->Merge(cluster);
  288. node_2_cluster_[*(cluster->Nodes().begin())] = in_cluster;
  289. continue;
  290. }
  291. for (const auto &in_cluster : cluster->Inputs()) {
  292. if (!in_cluster->IsKnownShape()) {
  293. continue;
  294. }
  295. if (cluster->TryMerge(in_cluster)) {
  296. GELOGD("Success merge known shape cluster from %lu to %lu.", in_cluster->Id(), cluster->Id());
  297. for (const auto &node : in_cluster->Nodes()) {
  298. node_2_cluster_[node] = cluster;
  299. }
  300. }
  301. }
  302. }
  303. }
  304. void DynamicShapePartitioner::MergeClustersInputData() {
  305. // Merge input clusters
  306. std::shared_ptr<Cluster> cluster_pre = nullptr;
  307. for (const auto &cluster : ordered_cluster_) {
  308. if (!cluster->IsInputNode()) {
  309. continue;
  310. }
  311. if (cluster_pre != nullptr) {
  312. cluster_pre->Merge(cluster);
  313. } else {
  314. cluster_pre = cluster;
  315. }
  316. GELOGD("Success merge input node cluster from %lu to %lu.", cluster->Id(), cluster->Id());
  317. for (const auto &node : cluster->Nodes()) {
  318. node_2_cluster_[node] = cluster_pre;
  319. }
  320. }
  321. }
  322. Status DynamicShapePartitioner::MergeClusters() {
  323. MergeClustersUnknownShape();
  324. REQUIRE_SUCCESS(TopologicalSortClusters(), "Failed topological sort clusters after merge unknown shape clusters.");
  325. MergeClustersKnownShape();
  326. MergeClustersInputData();
  327. return SUCCESS;
  328. }
  329. bool DynamicShapePartitioner::JudgeUnknowShapeWithAttr(const OpDescPtr &opdesc) {
  330. bool is_forced_unknown = false;
  331. if (AttrUtils::GetBool(opdesc, ATTR_NAME_IS_UNKNOWN_SHAPE, is_forced_unknown) && is_forced_unknown) {
  332. GELOGD("Collect node %s as unknown as it was marked unknown forcibly.", opdesc->GetName().c_str());
  333. return true;
  334. }
  335. bool forced_unknown = false;
  336. if (AttrUtils::GetBool(opdesc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, forced_unknown) && forced_unknown) {
  337. GELOGD("Collect node %s as unknown as it was marked force unknown node forcibly.", opdesc->GetName().c_str());
  338. return true;
  339. }
  340. return false;
  341. }
  342. Status DynamicShapePartitioner::CollectSpreadUnknownShapeNodes(NodePtr node) {
  343. if (unknown_shape_nodes_.count(node) > 0) {
  344. return SUCCESS;
  345. }
  346. auto opdesc = node->GetOpDesc();
  347. REQUIRE_NOT_NULL(opdesc, "Opdesc is nullptr.");
  348. // One can set 'ATTR_NAME_IS_UNKNOWN_SHAPE=true' on node so as to forcing the node flow into the unknown subgraph,
  349. // ignore the actual shape.
  350. if (JudgeUnknowShapeWithAttr(opdesc)) {
  351. unknown_shape_nodes_.insert(node);
  352. return SUCCESS;
  353. }
  354. size_t anchor_index = 0;
  355. bool is_unknown = false;
  356. for (auto &out_tensor : opdesc->GetAllOutputsDesc()) {
  357. if (IsUnknownShapeTensor(out_tensor)) {
  358. GELOGD("Collect node %s as unknown as output %lu is unknown.", node->GetName().c_str(), anchor_index);
  359. is_unknown = true;
  360. auto anchor = node->GetOutDataAnchor(static_cast<int>(anchor_index));
  361. for (const auto peer_anchor : anchor->GetPeerInDataAnchors()) {
  362. if (peer_anchor != nullptr) {
  363. GELOGD("Collect node %s as has unknown input from %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(),
  364. node->GetName().c_str(), anchor_index);
  365. unknown_shape_nodes_.insert(peer_anchor->GetOwnerNode());
  366. }
  367. }
  368. }
  369. anchor_index++;
  370. }
  371. anchor_index = 0;
  372. for (auto &in_tensor : opdesc->GetAllInputsDesc()) {
  373. if (IsUnknownShapeTensor(in_tensor)) {
  374. GELOGD("Collect node %s as unknown as input %lu is unknown.", node->GetName().c_str(), anchor_index);
  375. is_unknown = true;
  376. auto anchor = node->GetInDataAnchor(static_cast<int>(anchor_index));
  377. const auto peer_anchor = anchor->GetPeerOutAnchor();
  378. if (peer_anchor != nullptr) {
  379. GELOGD("Collect node %s as has unknown output to %s:%lu.", peer_anchor->GetOwnerNode()->GetName().c_str(),
  380. node->GetName().c_str(), anchor_index);
  381. unknown_shape_nodes_.insert(peer_anchor->GetOwnerNode());
  382. }
  383. }
  384. anchor_index++;
  385. }
  386. if (is_unknown) {
  387. unknown_shape_nodes_.insert(node);
  388. } else {
  389. auto graph = root_graph_;
  390. for (const auto &subgraph_name : opdesc->GetSubgraphInstanceNames()) {
  391. auto subgraph = graph->GetSubgraph(subgraph_name);
  392. REQUIRE_NOT_NULL(subgraph, "Failed get subgraph %s of node %s on root graph.", subgraph_name.c_str(),
  393. node->GetName().c_str());
  394. bool is_graph_unknow = false;
  395. REQUIRE_SUCCESS(IsUnknownShapeGraph(subgraph, is_graph_unknow), "Failed check subgraph %s shape of node %s.",
  396. subgraph_name.c_str(), node->GetName().c_str());
  397. if (is_graph_unknow) {
  398. GELOGD("Collect node %s as its subgraph %s is unknown.", node->GetName().c_str(), subgraph->GetName().c_str());
  399. unknown_shape_nodes_.insert(node);
  400. break;
  401. }
  402. }
  403. }
  404. return SUCCESS;
  405. }
  406. Status DynamicShapePartitioner::IsUnknownShapeNode(NodePtr node, bool &is_unknown) {
  407. auto opdesc = node->GetOpDesc();
  408. auto graph = root_graph_;
  409. for (auto &out_tensor : opdesc->GetAllOutputsDesc()) {
  410. if (IsUnknownShapeTensor(out_tensor)) {
  411. GELOGD("Mark node %s unknown as unknown output.", node->GetName().c_str());
  412. is_unknown = true;
  413. return SUCCESS;
  414. }
  415. }
  416. for (auto &in_tensor : opdesc->GetAllInputsDesc()) {
  417. if (IsUnknownShapeTensor(in_tensor)) {
  418. GELOGD("Mark node %s unknown as unknown intput.", node->GetName().c_str());
  419. is_unknown = true;
  420. return SUCCESS;
  421. }
  422. }
  423. for (auto &subgraph_name : opdesc->GetSubgraphInstanceNames()) {
  424. auto subgraph = graph->GetSubgraph(subgraph_name);
  425. REQUIRE_NOT_NULL(subgraph, "Failed get subgraph %s of node %s on root graph.", subgraph_name.c_str(),
  426. node->GetName().c_str());
  427. REQUIRE_SUCCESS(IsUnknownShapeGraph(subgraph, is_unknown), "Failed check subgraph %s shape of node %s.",
  428. subgraph_name.c_str(), node->GetName().c_str());
  429. if (is_unknown) {
  430. GELOGD("Mark node %s unknown as unknown subgraph.", node->GetName().c_str());
  431. return SUCCESS;
  432. }
  433. }
  434. is_unknown = false;
  435. return SUCCESS;
  436. }
  437. Status DynamicShapePartitioner::IsUnknownShapeGraph(ComputeGraphPtr graph, bool &is_unknown) {
  438. for (auto &node : graph->GetDirectNode()) {
  439. REQUIRE_SUCCESS(IsUnknownShapeNode(node, is_unknown), "Failed check node %s shape on graph %s.",
  440. node->GetName().c_str(), graph->GetName().c_str());
  441. if (is_unknown) {
  442. GELOGD("Mark graph %s unknown as contains unknown node %s.", graph->GetName().c_str(), node->GetName().c_str());
  443. return SUCCESS;
  444. }
  445. }
  446. return SUCCESS;
  447. }
  448. bool DynamicShapePartitioner::IsUnknownShapeTensor(const GeTensorDesc &tensor) {
  449. const static int kUnknowShape = -1;
  450. const static int kUnknowRank = -2;
  451. for (auto dim_size : tensor.GetShape().GetDims()) {
  452. if (dim_size == kUnknowShape || dim_size == kUnknowRank) {
  453. return true;
  454. }
  455. }
  456. return false;
  457. }
  458. std::string Cluster::DebugString() const {
  459. std::stringstream ss;
  460. switch (type_) {
  461. case DATA:
  462. ss << "DATA";
  463. break;
  464. case INPUT_NODE:
  465. ss << "INPUT_NODE";
  466. break;
  467. case NETOUTPUT:
  468. ss << "NETOUTPUT";
  469. break;
  470. case UNKNOWN_SHAPE:
  471. ss << "UNKNOW";
  472. break;
  473. case KNOWN_SHAPE:
  474. ss << "KNOW";
  475. break;
  476. }
  477. ss << "[" << id_ << "](size:" << nodes_.size() << ")";
  478. ss << "(" << min_ << "," << max_ << ")(";
  479. for (const auto &cluster : in_clusters_) {
  480. ss << cluster->id_ << ",";
  481. }
  482. ss << ")->(";
  483. for (const auto &cluster : out_clusters_) {
  484. ss << cluster->id_ << ",";
  485. }
  486. ss << ")|";
  487. for (const auto &node : nodes_) {
  488. ss << (node->GetName() + "|");
  489. }
  490. return ss.str();
  491. }
  492. size_t Cluster::Id() const { return id_; }
  493. void Cluster::UpdateRank(size_t rank) {
  494. max_ = rank;
  495. min_ = rank;
  496. };
  497. bool Cluster::IsData() const { return type_ == DATA; };
  498. bool Cluster::IsKnownShape() const { return type_ == KNOWN_SHAPE; };
  499. bool Cluster::IsUnknownShape() const { return type_ == UNKNOWN_SHAPE; };
  500. bool Cluster::IsNetOutput() const { return type_ == NETOUTPUT; };
  501. bool Cluster::IsInputNode() const { return type_ == INPUT_NODE; };
  502. bool Cluster::IsRefVariable() const {
  503. if ((nodes_.size() == 1) && ((nodes_[0]->GetType() == VARIABLE) || (nodes_[0]->GetType() == VARIABLEV2))) {
  504. std::string ref_variable_name;
  505. return (AttrUtils::GetStr(nodes_[0]->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_variable_name) &&
  506. !ref_variable_name.empty());
  507. }
  508. return false;
  509. }
  510. void Cluster::AddInput(ClusterPtr in) {
  511. if (std::find(in_clusters_.begin(), in_clusters_.end(), in) != in_clusters_.end()) return;
  512. in_clusters_.insert(in_clusters_.end(), in);
  513. if (std::find(in->out_clusters_.begin(), in->out_clusters_.end(), shared_from_this()) != in->out_clusters_.end())
  514. return;
  515. in->out_clusters_.insert(in->out_clusters_.end(), shared_from_this());
  516. };
  517. void Cluster::RemoveInput(ClusterPtr in) {
  518. in_clusters_.erase(std::remove(in_clusters_.begin(), in_clusters_.end(), in), in_clusters_.end());
  519. in->out_clusters_.erase(std::remove(in->out_clusters_.begin(), in->out_clusters_.end(), shared_from_this()),
  520. in->out_clusters_.end());
  521. };
  522. void Cluster::AddOutput(ClusterPtr out) {
  523. if (std::find(out_clusters_.begin(), out_clusters_.end(), out) != out_clusters_.end()) return;
  524. out_clusters_.insert(out_clusters_.end(), out);
  525. if (std::find(out->in_clusters_.begin(), out->in_clusters_.end(), shared_from_this()) != out->in_clusters_.end())
  526. return;
  527. out->in_clusters_.insert(out->in_clusters_.end(), shared_from_this());
  528. };
  529. void Cluster::RemoveOutput(ClusterPtr out) {
  530. out_clusters_.erase(std::remove(out_clusters_.begin(), out_clusters_.end(), out), out_clusters_.end());
  531. out->in_clusters_.erase(std::remove(out->in_clusters_.begin(), out->in_clusters_.end(), shared_from_this()),
  532. out->in_clusters_.end());
  533. };
  534. void Cluster::Merge(ClusterPtr other) {
  535. nodes_.insert(nodes_.end(), other->nodes_.begin(), other->nodes_.end());
  536. other->in_clusters_.erase(std::remove(other->in_clusters_.begin(), other->in_clusters_.end(), shared_from_this()),
  537. other->in_clusters_.end());
  538. other->out_clusters_.erase(std::remove(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()),
  539. other->out_clusters_.end());
  540. in_clusters_.erase(std::remove(in_clusters_.begin(), in_clusters_.end(), other), in_clusters_.end());
  541. out_clusters_.erase(std::remove(out_clusters_.begin(), out_clusters_.end(), other), out_clusters_.end());
  542. auto in_clusters = other->in_clusters_;
  543. for (const auto &cluster : in_clusters) {
  544. cluster->RemoveOutput(other);
  545. cluster->AddOutput(shared_from_this());
  546. }
  547. auto out_clusters = other->out_clusters_;
  548. for (const auto &cluster : out_clusters) {
  549. cluster->RemoveInput(other);
  550. cluster->AddInput(shared_from_this());
  551. }
  552. if (other->max_ > max_) {
  553. max_ = other->max_;
  554. }
  555. if (other->min_ < min_) {
  556. min_ = other->min_;
  557. }
  558. };
  559. bool Cluster::TryMerge(ClusterPtr other) {
  560. std::queue<ClusterPtr> forward_reached;
  561. forward_reached.push(other);
  562. while (!forward_reached.empty()) {
  563. auto current_cluster = forward_reached.front();
  564. forward_reached.pop();
  565. for (const auto &cluster : current_cluster->out_clusters_) {
  566. if (cluster->max_ == max_ && current_cluster != other) {
  567. return false;
  568. } else if (cluster->min_ < max_) {
  569. forward_reached.push(cluster);
  570. }
  571. }
  572. }
  573. Merge(other);
  574. return true;
  575. };
  576. std::vector<ClusterPtr> Cluster::MergeAllPathFrom(ClusterPtr other) {
  577. std::queue<ClusterPtr> forward_reached_queue;
  578. std::queue<ClusterPtr> backward_reached_queue;
  579. std::unordered_set<ClusterPtr> forward_reached_clusters;
  580. std::unordered_set<ClusterPtr> backward_reached_clusters;
  581. std::vector<ClusterPtr> path_clusters;
  582. if (std::find(other->out_clusters_.begin(), other->out_clusters_.end(), shared_from_this()) ==
  583. other->out_clusters_.end()) {
  584. return path_clusters;
  585. }
  586. path_clusters.push_back(other);
  587. forward_reached_queue.push(other);
  588. backward_reached_queue.push(shared_from_this());
  589. while (!forward_reached_queue.empty()) {
  590. auto current_cluster = forward_reached_queue.front();
  591. forward_reached_queue.pop();
  592. for (const auto &cluster : current_cluster->out_clusters_) {
  593. if (cluster->min_ < max_ && cluster->max_ != max_ && forward_reached_clusters.count(cluster) == 0) {
  594. forward_reached_clusters.insert(cluster);
  595. forward_reached_queue.push(cluster);
  596. }
  597. }
  598. }
  599. while (!backward_reached_queue.empty()) {
  600. auto current_cluster = backward_reached_queue.front();
  601. backward_reached_queue.pop();
  602. for (const auto &cluster : current_cluster->in_clusters_) {
  603. if (cluster->max_ > other->min_ && cluster->max_ != other->max_ &&
  604. backward_reached_clusters.count(cluster) == 0) {
  605. backward_reached_clusters.insert(cluster);
  606. backward_reached_queue.push(cluster);
  607. if (forward_reached_clusters.count(cluster) != 0) {
  608. path_clusters.push_back(cluster);
  609. }
  610. }
  611. }
  612. }
  613. for (const auto &cluster : path_clusters) {
  614. Merge(cluster);
  615. }
  616. return path_clusters;
  617. }
  618. std::vector<ClusterPtr> Cluster::Inputs() const { return in_clusters_; };
  619. std::vector<ClusterPtr> Cluster::Outputs() const { return out_clusters_; };
  620. std::vector<NodePtr> Cluster::Nodes() const { return nodes_; };
  621. void Cluster::AddFrameInput(InDataAnchorPtr anchor) {
  622. inputs_index_[anchor] = inputs_.size();
  623. inputs_.push_back(anchor);
  624. };
  625. void Cluster::AddFrameOutput(OutDataAnchorPtr anchor) {
  626. outputs_index_[anchor] = outputs_.size();
  627. outputs_.push_back(anchor);
  628. };
  629. InDataAnchorPtr Cluster::GetFrameInDataAnchor(InDataAnchorPtr anchor) {
  630. return partition_node_->GetInDataAnchor(static_cast<int>(inputs_index_[anchor]));
  631. };
  632. OutDataAnchorPtr Cluster::GetFrameOutDataAnchor(OutDataAnchorPtr anchor) {
  633. return partition_node_->GetOutDataAnchor(static_cast<int>(outputs_index_[anchor]));
  634. };
  635. InControlAnchorPtr Cluster::GetFrameInControlAnchor() { return partition_node_->GetInControlAnchor(); };
  636. OutControlAnchorPtr Cluster::GetFrameOutControlAnchor() { return partition_node_->GetOutControlAnchor(); };
  637. Status Cluster::BuildFrame() {
  638. if (IsUnknownShape() || IsKnownShape() || IsInputNode()) {
  639. return BuildPartitionFrame();
  640. } else {
  641. auto node = nodes_.front();
  642. auto in_control_anchor = node->GetInControlAnchor();
  643. if (in_control_anchor != nullptr) {
  644. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  645. auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()];
  646. if (src_cluster->id_ != id_) {
  647. REQUIRE_GRAPH_SUCCESS(
  648. GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor),
  649. "Failed remove edge from node %s index %d to node %s index %d.",
  650. peer_out_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(peer_out_control_anchor),
  651. in_control_anchor->GetOwnerNode()->GetName().c_str(), AnchorUtils::GetIdx(in_control_anchor));
  652. control_inputs_.insert(src_cluster);
  653. src_cluster->control_outputs_.insert(peer_out_control_anchor);
  654. }
  655. }
  656. }
  657. if (IsData()) {
  658. for (const auto &anchor : node->GetAllOutDataAnchors()) {
  659. AddFrameOutput(anchor);
  660. }
  661. } else {
  662. for (const auto &anchor : node->GetAllInDataAnchors()) {
  663. AddFrameInput(anchor);
  664. }
  665. }
  666. partition_node_ = node;
  667. }
  668. return SUCCESS;
  669. }
  670. Status Cluster::BuildPartitionFrame() {
  671. auto graph = partitioner_->root_graph_;
  672. bool is_unknown_shape = IsUnknownShape();
  673. bool is_input = IsInputNode();
  674. string known_name = (is_unknown_shape ? "_unknow" : "_know");
  675. string sub_graph_name_patten = (is_input ? "_input" : known_name);
  676. std::string sub_graph_name = graph->GetName() + "_sub_" + std::to_string(unique_id_) + sub_graph_name_patten;
  677. subgraph_ = MakeShared<ComputeGraph>(sub_graph_name);
  678. REQUIRE_NOT_NULL(subgraph_, "Failed new memory for subgraph.");
  679. auto partition_op = MakeShared<OpDesc>("PartitionedCall_" + std::to_string(unique_id_++), "PartitionedCall");
  680. REQUIRE_NOT_NULL(partition_op, "Failed new memory for partition op.");
  681. REQUIRE(AttrUtils::SetBool(partition_op, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape),
  682. "Failed set _is_unknown_shape flag on partitioned op %s.", partition_op->GetName().c_str());
  683. REQUIRE_GRAPH_SUCCESS(partition_op->AddSubgraphName(subgraph_->GetName()), "Failed add subgraph name.");
  684. REQUIRE_GRAPH_SUCCESS(partition_op->SetSubgraphInstanceName(0, subgraph_->GetName()),
  685. "Failed set subgraph instance name.");
  686. for (auto &node : nodes_) {
  687. REQUIRE_NOT_NULL(subgraph_->AddNode(node), "Failed add node to subgraph.");
  688. REQUIRE(AttrUtils::SetBool(node->GetOpDesc(), ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape),
  689. "Failed set shape flag.");
  690. REQUIRE_GRAPH_SUCCESS(GraphUtils::RemoveJustNode(graph, node), "Failed remove root graph node.");
  691. REQUIRE_GRAPH_SUCCESS(node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph.");
  692. for (const auto &anchor : node->GetAllInDataAnchors()) {
  693. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  694. if (peer_out_anchor == nullptr) {
  695. continue; // Skip overhang input.
  696. }
  697. auto src_cluster = partitioner_->node_2_cluster_[peer_out_anchor->GetOwnerNode()];
  698. if (src_cluster->id_ != id_) {
  699. AddFrameInput(anchor);
  700. REQUIRE_GRAPH_SUCCESS(partition_op->AddInputDesc(node->GetOpDesc()->GetInputDesc(anchor->GetIdx())),
  701. "Failed add input desc.");
  702. }
  703. }
  704. auto in_control_anchor = node->GetInControlAnchor();
  705. if (in_control_anchor != nullptr) {
  706. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  707. if (peer_out_control_anchor == nullptr) {
  708. continue;
  709. }
  710. auto src_cluster = partitioner_->node_2_cluster_[peer_out_control_anchor->GetOwnerNode()];
  711. if (src_cluster->id_ != id_) {
  712. REQUIRE_GRAPH_SUCCESS(
  713. GraphUtils::RemoveEdge(peer_out_control_anchor, in_control_anchor),
  714. "Failed remove edge from %s:%d to %s:%d.", peer_out_control_anchor->GetOwnerNode()->GetName().c_str(),
  715. peer_out_control_anchor->GetIdx(), node->GetName().c_str(), in_control_anchor->GetIdx());
  716. control_inputs_.insert(src_cluster);
  717. src_cluster->control_outputs_.insert(peer_out_control_anchor);
  718. }
  719. }
  720. }
  721. for (const auto &anchor : node->GetAllOutDataAnchors()) {
  722. auto peer_in_anchors = anchor->GetPeerInDataAnchors();
  723. for (const auto &peer_in_anchor : peer_in_anchors) {
  724. auto src_cluster = partitioner_->node_2_cluster_[peer_in_anchor->GetOwnerNode()];
  725. if (src_cluster->id_ != id_) {
  726. AddFrameOutput(anchor);
  727. REQUIRE_GRAPH_SUCCESS(partition_op->AddOutputDesc(node->GetOpDesc()->GetOutputDesc(anchor->GetIdx())),
  728. "Failed add output desc.");
  729. break;
  730. }
  731. }
  732. }
  733. }
  734. partition_node_ = graph->AddNode(partition_op);
  735. REQUIRE_NOT_NULL(partition_node_, "Failed add partition node.");
  736. REQUIRE_GRAPH_SUCCESS(partition_node_->SetOwnerComputeGraph(graph), "Failed set owner graph.");
  737. subgraph_->SetParentNode(partition_node_);
  738. subgraph_->SetParentGraph(graph);
  739. REQUIRE_GRAPH_SUCCESS(graph->AddSubgraph(subgraph_), "Failed add subgraph to root graph.");
  740. std::string session_graph_id;
  741. REQUIRE(AttrUtils::GetStr(*graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id),
  742. "Failed get ATTR_NAME_SESSION_GRAPH_ID on root graph.");
  743. REQUIRE(AttrUtils::SetStr(*subgraph_, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id),
  744. "Failed set ATTR_NAME_SESSION_GRAPH_ID on subgraph.");
  745. return SUCCESS;
  746. }
  747. Status Cluster::CombinePartitionFrame() {
  748. for (const auto &anchor : inputs_) {
  749. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  750. auto src_cluster = partitioner_->node_2_cluster_[peer_out_anchor->GetOwnerNode()];
  751. auto src_anchor = src_cluster->GetFrameOutDataAnchor(peer_out_anchor);
  752. auto dst_anchor = GetFrameInDataAnchor(anchor);
  753. REQUIRE_GRAPH_SUCCESS(GraphUtils::RemoveEdge(peer_out_anchor, anchor), "Failed remove edge from %s:%d to %s:%d.",
  754. peer_out_anchor->GetOwnerNode()->GetName().c_str(), peer_out_anchor->GetIdx(),
  755. anchor->GetOwnerNode()->GetName().c_str(), anchor->GetIdx());
  756. REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(src_anchor, dst_anchor), "Failed add edge from %s:%d to %s:%d.",
  757. src_anchor->GetOwnerNode()->GetName().c_str(), src_anchor->GetIdx(),
  758. dst_anchor->GetOwnerNode()->GetName().c_str(), dst_anchor->GetIdx());
  759. }
  760. for (const auto &src_cluster : control_inputs_) {
  761. auto src_anchor = src_cluster->GetFrameOutControlAnchor();
  762. auto dst_anchor = GetFrameInControlAnchor();
  763. REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(src_anchor, dst_anchor), "Failed add edge from %s:%d to %s:%d.",
  764. src_anchor->GetOwnerNode()->GetName().c_str(), src_anchor->GetIdx(),
  765. dst_anchor->GetOwnerNode()->GetName().c_str(), dst_anchor->GetIdx());
  766. }
  767. return SUCCESS;
  768. }
  769. Status Cluster::BuildPartitionSubgraph() {
  770. if (IsData() || IsNetOutput()) {
  771. return SUCCESS;
  772. }
  773. int64_t parent_node_index = 0;
  774. for (auto anchor : inputs_) {
  775. auto data_op =
  776. MakeShared<OpDesc>(subgraph_->GetName() + std::string("Data_") + std::to_string(parent_node_index), ge::DATA);
  777. REQUIRE_NOT_NULL(data_op, "Failed new memory for data op.");
  778. auto input_desc = anchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(anchor->GetIdx());
  779. REQUIRE_GRAPH_SUCCESS(data_op->AddInputDesc(input_desc), "Failed add input desc.");
  780. REQUIRE_GRAPH_SUCCESS(data_op->AddOutputDesc(input_desc), "Failed add output desc.");
  781. REQUIRE(AttrUtils::SetInt(data_op, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index),
  782. "Failed set parent_node_index on subgraph data node.");
  783. bool is_unknown_shape = IsUnknownShape();
  784. REQUIRE(AttrUtils::SetBool(data_op, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape),
  785. "Failed set _is_unknown_shape flag on data op %s.", data_op->GetName().c_str());
  786. auto data_node = subgraph_->AddNode(data_op);
  787. REQUIRE_NOT_NULL(data_node, "Failed add data node to subgraph.");
  788. REQUIRE_GRAPH_SUCCESS(data_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of data node.");
  789. REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(data_node->GetOutDataAnchor(0), anchor),
  790. "Faile add data input edge to %s:%d", anchor->GetOwnerNode()->GetName().c_str(),
  791. anchor->GetIdx());
  792. parent_node_index++;
  793. }
  794. if (outputs_.empty() && control_outputs_.empty()) {
  795. return SUCCESS;
  796. }
  797. auto net_output_op = MakeShared<OpDesc>(subgraph_->GetName() + "_" + NODE_NAME_NET_OUTPUT, ge::NETOUTPUT);
  798. REQUIRE_NOT_NULL(net_output_op, "Failed new memory for netoutput op.");
  799. bool is_unknown_shape = IsUnknownShape();
  800. REQUIRE(AttrUtils::SetBool(net_output_op, ATTR_NAME_IS_UNKNOWN_SHAPE, is_unknown_shape),
  801. "Failed set _is_unknown_shape flag on net_output_op %s.", net_output_op->GetName().c_str());
  802. for (size_t i = 0; i < outputs_.size(); ++i) {
  803. GeTensorDesc input_desc;
  804. REQUIRE_GRAPH_SUCCESS(net_output_op->AddInputDesc(input_desc), "Failed add input desc.");
  805. }
  806. auto net_output_node = subgraph_->AddNode(net_output_op);
  807. REQUIRE_NOT_NULL(net_output_node, "Failed add netoutput node to subgraph.");
  808. REQUIRE_GRAPH_SUCCESS(net_output_node->SetOwnerComputeGraph(subgraph_), "Failed set owner graph of netoutput node.");
  809. parent_node_index = 0;
  810. for (const auto &anchor : outputs_) {
  811. auto output_desc = anchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  812. REQUIRE(AttrUtils::SetInt(output_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_node_index),
  813. "Failed set parent_node_index on subgraph netoutput's input.");
  814. REQUIRE_GRAPH_SUCCESS(net_output_op->UpdateInputDesc(parent_node_index, output_desc),
  815. "Failed update input desc of netoutput node.");
  816. REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(anchor, net_output_node->GetInDataAnchor(parent_node_index)),
  817. "Faile add edge from %s:%d to netoutput node.", anchor->GetOwnerNode()->GetName().c_str(),
  818. anchor->GetIdx());
  819. parent_node_index++;
  820. }
  821. for (const auto &anchor : control_outputs_) {
  822. REQUIRE_GRAPH_SUCCESS(GraphUtils::AddEdge(anchor, net_output_node->GetInControlAnchor()),
  823. "Faile add control edge from %s:%d to netoutput node.",
  824. anchor->GetOwnerNode()->GetName().c_str(), anchor->GetIdx());
  825. }
  826. return SUCCESS;
  827. }
  828. void Cluster::Clear() {
  829. in_clusters_.clear();
  830. out_clusters_.clear();
  831. nodes_.clear();
  832. partitioner_ = nullptr;
  833. inputs_index_.clear();
  834. outputs_index_.clear();
  835. inputs_.clear();
  836. outputs_.clear();
  837. control_inputs_.clear();
  838. control_outputs_.clear();
  839. partition_node_.reset();
  840. subgraph_.reset();
  841. unique_id_ = 0;
  842. }
  843. thread_local size_t Cluster::unique_id_ = 0;
  844. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示