You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 80 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. namespace {
  33. const int kDataOutputIndex = 0;
  34. const int kAllInputAddrIsAtomic = -1;
  35. const int kVirtualInputNodeMemoryReuse = 0;
  36. const int kVirtualOutputNodeMemoryReuse = 1;
  37. const size_t kVirtualInputNodeOutputSize = 1;
  38. const size_t kVirtualOutputNodeInputSize = 1;
  39. const size_t kVirtualNodeDataIndex = 0;
  40. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  41. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  42. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  43. const ge::NodePtr &node, const uint32_t i) {
  44. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  45. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  46. if (iter1 == anchor_to_symbol.end()) {
  47. return ge::kInvalidOffset;
  48. }
  49. auto out_symbol = iter1->second;
  50. auto iter2 = symbol_to_anchors.find(out_symbol);
  51. if (iter2 == symbol_to_anchors.end()) {
  52. return ge::kInvalidOffset;
  53. }
  54. for (const auto &node_index_io : iter2->second) {
  55. if (node_index_io.value_ == out_symbol) {
  56. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  57. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  58. if (node_index_io.index_ >= symbol_output_list.size()) {
  59. return ge::kInvalidOffset;
  60. }
  61. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  62. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  63. return symbol_output_list.at(node_index_io.index_);
  64. }
  65. }
  66. return ge::kInvalidOffset;
  67. }
  68. } // namespace
  69. namespace ge {
  70. Status VariableMemoryAssigner::Assign() {
  71. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  72. if (result != ge::SUCCESS) {
  73. return result;
  74. }
  75. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  76. if (result != ge::SUCCESS) {
  77. return result;
  78. }
  79. return ge::SUCCESS;
  80. }
  81. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  82. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  83. if (result != ge::SUCCESS) {
  84. return result;
  85. }
  86. return ge::SUCCESS;
  87. }
  88. Status GraphMemoryAssigner::AssignMemory() {
  89. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  90. if (mem_assigner->Assign() != ge::SUCCESS) {
  91. GELOGE(ge::FAILED, "Memory assigner failed");
  92. return ge::FAILED;
  93. }
  94. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  95. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  96. if (mem_assigner->GetP2PMemOffset() > 0) {
  97. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  98. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  99. }
  100. auto session_id = compute_graph_->GetSessionID();
  101. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  102. auto variable_assigner =
  103. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  104. if (variable_assigner == nullptr) {
  105. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  106. return ge::FAILED;
  107. }
  108. if (variable_assigner->Assign() != ge::SUCCESS) {
  109. return ge::FAILED;
  110. }
  111. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  112. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  113. mem_assigner_ = std::move(mem_assigner);
  114. return ge::SUCCESS;
  115. }
  116. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  117. auto variable_assigner =
  118. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  119. if (variable_assigner == nullptr) {
  120. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  121. return ge::FAILED;
  122. }
  123. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  124. return ge::FAILED;
  125. }
  126. return ge::SUCCESS;
  127. }
  128. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  129. int64_t dim_index, int64_t &output_mem_size,
  130. int64_t &batch_dim_num, int64_t &out_size) {
  131. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  132. if (graph_status != GRAPH_SUCCESS) {
  133. GELOGE(FAILED, "Opdesc GetSize failed!");
  134. return FAILED;
  135. }
  136. GeShape output_shape = output_desc->GetShape();
  137. std::vector<int64_t> output_dims = output_shape.GetDims();
  138. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  139. std::string error = "Invaild value" + FmtToStr(dim_index) +
  140. " of attr _reuse_input_on_dim_index, which is out of data range [0,"
  141. + std::to_string(output_dims.size()) + ")";
  142. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  143. return FAILED;
  144. }
  145. for (int64_t index = 0; index < dim_index; index++) {
  146. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  147. batch_dim_num *= output_dims[index];
  148. output_dims[index] = 1;
  149. }
  150. output_shape = GeShape(output_dims);
  151. Format out_format = output_desc->GetFormat();
  152. DataType data_type = output_desc->GetDataType();
  153. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  154. if (graph_status != GRAPH_SUCCESS) {
  155. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  156. return FAILED;
  157. }
  158. if (output_mem_size < 0) {
  159. std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) +
  160. " is out of data range [0," + std::to_string(INT64_MAX) + "]";
  161. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  162. return FAILED;
  163. }
  164. return SUCCESS;
  165. }
  166. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  167. int32_t mem_reuse_model, string &max_batch_label) {
  168. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  169. vector<NodePtr> virtual_nodes_list = i_map.second;
  170. vector<int64_t> max_shape_dims;
  171. size_t max_batch_dim = 0;
  172. bool max_batch_dim_find = false;
  173. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  174. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  175. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  176. GE_CHECK_NOTNULL(op_desc);
  177. ge::ConstGeTensorDescPtr input_output_desc;
  178. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  179. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  180. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  181. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  182. } else {
  183. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  184. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  185. return FAILED;
  186. }
  187. GE_CHECK_NOTNULL(input_output_desc);
  188. if (i == 0) {
  189. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  190. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  191. max_shape_dims = input_output_desc->GetShape().GetDims();
  192. } else {
  193. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  194. if (current_shape_dims.size() != max_shape_dims.size()) {
  195. std::string error = "The shape of several nodes between multiple batches does not match.";
  196. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  197. return FAILED;
  198. }
  199. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  200. if (current_shape_dims[j] == max_shape_dims[j]) {
  201. continue;
  202. }
  203. if (max_batch_dim_find && max_batch_dim != j) {
  204. std::string error = "The shape of several nodes between multiple batches does not match.";
  205. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  206. return FAILED;
  207. }
  208. max_batch_dim_find = true;
  209. max_batch_dim = j;
  210. if (current_shape_dims[j] > max_shape_dims[j]) {
  211. max_shape_dims[j] = current_shape_dims[j];
  212. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  213. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  214. }
  215. // Only compare the first different dim in shape.
  216. break;
  217. }
  218. }
  219. }
  220. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  221. break;
  222. }
  223. return SUCCESS;
  224. }
  225. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  226. if (memory_offset_.empty()) {
  227. GELOGE(FAILED, "memory_offset_ is empty.");
  228. return ge::FAILED;
  229. }
  230. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  231. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  232. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  233. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  234. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  235. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  236. size_t total_mem_offset = 0;
  237. for (auto pair : memory_offset_) {
  238. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  239. total_mem_offset += pair.second.mem_offset_;
  240. }
  241. auto session_id = compute_graph_->GetSessionID();
  242. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  243. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset,
  244. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  245. for (auto iter : mem_type_to_offset) {
  246. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  247. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  248. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  249. }
  250. return ge::FAILED;
  251. }
  252. return SUCCESS;
  253. }
  254. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  255. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  256. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  257. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  258. // set offset for zero copy block
  259. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  260. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  261. continue;
  262. }
  263. memory_block->Resize();
  264. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  265. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  266. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  267. }
  268. // set offset for zero copy nodes
  269. priority_assigner->SetOpMemOffset(true);
  270. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  271. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  272. if (iter == memory_offset_.end()) {
  273. std::string error = "Memory offset does not have memory type[HBM]";
  274. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  275. return FAILED;
  276. }
  277. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  278. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  279. zero_mem_copy_size);
  280. return SUCCESS;
  281. }
  282. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  283. Status ret;
  284. for (auto &node : compute_graph_->GetAllNodes()) {
  285. // Get the continuous input type of the node, default is false
  286. bool is_input_continuous = false;
  287. GE_CHECK_NOTNULL(node->GetOpDesc());
  288. // If GetBool fail, is_input_continuous is false.
  289. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  290. // Assign continuous input memory
  291. if (is_input_continuous) {
  292. int64_t memory_type = RT_MEMORY_HBM;
  293. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
  294. int64_t mem_clean_start = 0;
  295. int64_t mem_clean_size = 0;
  296. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type);
  297. if (ret != ge::SUCCESS) {
  298. GELOGE(ret, "Assign continuous input memory failed!");
  299. return ret;
  300. }
  301. // Clean up atomic address, eg, hcom node
  302. vector<int32_t> input_indexes;
  303. // If GetListInt fail, input_indexes is empty.
  304. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  305. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  306. // check whether there is an atomic conflict between the current node and the peer out node
  307. if (!CheckInputIsSupportAtomic(node)) {
  308. GELOGE(ge::FAILED,
  309. "There is an atomic conflict between the current node and the peer out node, not supported!");
  310. return ge::FAILED;
  311. }
  312. const auto &in_control_anchor = node->GetInControlAnchor();
  313. GE_CHECK_NOTNULL(in_control_anchor);
  314. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  315. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  316. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  317. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
  318. if (ret != SUCCESS) {
  319. GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
  320. return ret;
  321. }
  322. }
  323. }
  324. }
  325. }
  326. // Get the reference type of the node, default is false
  327. bool is_ref = false;
  328. // If GetBool fail, is_ref is false.
  329. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  330. // Get the continuous output type of the node, default is false
  331. bool is_output_continuous = false;
  332. // If GetBool fail, is_output_continuous is false.
  333. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  334. // If the output is ref type and refers to the ref of an input, the name of the output
  335. // and the input are the same. Ge encounters ref type, finds matching relationship according
  336. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  337. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  338. ret = AssignContinuousOutputMemory(node);
  339. if (ret != ge::SUCCESS) {
  340. GELOGE(ret, "Assign reference memory failed!");
  341. return ret;
  342. }
  343. }
  344. }
  345. for (auto pair : memory_offset_) {
  346. GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
  347. pair.second.mem_offset_);
  348. }
  349. return ge::SUCCESS;
  350. }
  351. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  352. int64_t &continuous_mem_size, int64_t memory_type) {
  353. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  354. bool continuous_input_alloc = false;
  355. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  356. auto iter = memory_offset_.find(memory_type);
  357. if (iter == memory_offset_.end()) {
  358. std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type);
  359. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  360. return FAILED;
  361. }
  362. continuous_mem_start = iter->second.mem_offset_;
  363. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  364. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  365. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  366. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  367. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  368. bool is_peer_output_continuous = false;
  369. // If GetBool fail, is_peer_output_continuous is false.
  370. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  371. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  372. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  373. // conflict between the two, we can not support it.
  374. auto peer_output_size = peer_op_desc->GetOutputsSize();
  375. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  376. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  377. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  378. " requires continuous output. There may be conflict between the two." +
  379. "This node is not supported now.";
  380. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  381. return PARAM_INVALID;);
  382. bool is_peer_reference = false;
  383. // If GetBool fail, is_peer_reference is false.
  384. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  385. GE_IF_BOOL_EXEC(is_peer_reference,
  386. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  387. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  388. " requires continuous output. There may be conflict between the two." +
  389. "This node is not supported now.";
  390. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  391. return PARAM_INVALID;);
  392. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  393. std::vector<int64_t> offsets_for_fusion = {};
  394. bool has_offset_attr =
  395. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  396. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  397. if (continuous_input_alloc && !has_offset_attr) {
  398. if (in_data_anchor->GetIdx() == 0) {
  399. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  400. }
  401. // can not use else if, incase only one input
  402. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  403. int64_t tensor_desc_size = 0;
  404. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  405. tensor_desc_size);
  406. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  407. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  408. continuous_mem_size =
  409. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  410. }
  411. GELOGI(
  412. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] "
  413. "real_size[%u].",
  414. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  415. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  416. 0, 0);
  417. continue;
  418. }
  419. output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_;
  420. } else {
  421. std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  422. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  423. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  424. return FAILED;
  425. }
  426. peer_op_desc->SetOutputOffset(output_list);
  427. size_t pre_mem_offset = iter->second.mem_offset_;
  428. int64_t tensor_desc_size = 0;
  429. if (has_offset_attr) {
  430. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  431. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  432. iter->second.mem_offset_ += offset_for_fusion;
  433. } else {
  434. std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) +
  435. " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  436. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  437. return FAILED;
  438. }
  439. } else {
  440. Status ret =
  441. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  442. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  443. iter->second.mem_offset_ += tensor_desc_size;
  444. }
  445. // If set tensor_actual_size, Memory alignment is not required.
  446. int32_t is_tensor_actual_size = 0;
  447. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  448. if (is_tensor_actual_size == 0) {
  449. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  450. }
  451. GELOGI(
  452. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  453. "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  454. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(),
  455. (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size);
  456. }
  457. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  458. if (!continuous_input_alloc) {
  459. continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start;
  460. }
  461. return SUCCESS;
  462. }
  463. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  464. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  465. auto out_op_desc = node->GetOpDesc();
  466. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  467. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  468. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  469. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  470. out_op_desc->GetOutputsSize(), output_list.size());
  471. return ge::FAILED;
  472. }
  473. size_t mem_offset = output_list[0];
  474. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  475. output_list[out_data_anchor->GetIdx()] = mem_offset;
  476. int64_t tensor_desc_size = 0;
  477. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  478. ge::SUCCESS) {
  479. GELOGE(FAILED, "GetSize failed.");
  480. return FAILED;
  481. }
  482. mem_offset += tensor_desc_size;
  483. if (mem_offset <= 0) {
  484. return FAILED;
  485. }
  486. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  487. GELOGI(
  488. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  489. "real_size[%ld].",
  490. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  491. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  492. }
  493. out_op_desc->SetOutputOffset(output_list);
  494. return ge::SUCCESS;
  495. }
  496. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  497. OpDescPtr op_desc = node->GetOpDesc();
  498. vector<int64_t> output_list = op_desc->GetOutputOffset();
  499. if (output_list.empty()) {
  500. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  501. return FAILED;
  502. }
  503. output_list.at(0) = mem_offset_reuse;
  504. op_desc->SetOutputOffset(output_list);
  505. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  506. int64_t attr_dim_index;
  507. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  508. if (!get_attr_dim_flag) {
  509. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  510. return FAILED;
  511. }
  512. size_t extra_memory_size = 0;
  513. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  514. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  515. GE_CHECK_NOTNULL(peer_out_data_anchor);
  516. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  517. GE_CHECK_NOTNULL(peer_op_desc);
  518. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  519. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  520. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  521. return ge::FAILED;
  522. }
  523. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  524. peer_op_desc->SetOutputOffset(output_offsets);
  525. size_t pre_mem_offset = mem_offset_reuse;
  526. // Calculate tensor real size of each piece of data and out size of complete data
  527. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  528. GE_CHECK_NOTNULL(output_desc);
  529. int64_t output_mem_size;
  530. int64_t batch_dim_num = 1;
  531. int64_t out_size;
  532. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  533. SUCCESS) {
  534. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  535. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  536. return FAILED;
  537. }
  538. mem_offset_reuse += output_mem_size;
  539. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  540. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  541. "real_size[%ld].",
  542. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  543. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size,
  544. output_mem_size);
  545. }
  546. mem_offset_reuse += extra_memory_size;
  547. size_t after_mem_offset = mem_offset_reuse;
  548. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  549. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  550. return SUCCESS;
  551. }
  552. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  553. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  554. int64_t memory_type = RT_MEMORY_HBM;
  555. for (const auto &n : compute_graph_->GetAllNodes()) {
  556. OpDescPtr op_desc = n->GetOpDesc();
  557. GE_CHECK_NOTNULL(op_desc);
  558. bool attr_continuous = false;
  559. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  560. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  561. bool attr_reuse = false;
  562. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  563. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  564. if (attr_reuse && attr_continuous) {
  565. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  566. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  567. std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) +
  568. " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs.";
  569. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  570. return FAILED;
  571. }
  572. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.");
  573. auto iter = memory_offset_.find(memory_type);
  574. if (iter == memory_offset_.end()) {
  575. std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type);
  576. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  577. return FAILED;
  578. }
  579. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.",
  580. iter->second.mem_offset_, memory_type);
  581. string batch_label_string;
  582. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  583. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  584. if (batch_label_string.empty()) {
  585. size_t node_mem_offset = iter->second.mem_offset_;
  586. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  587. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  588. if (status != SUCCESS) {
  589. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  590. return FAILED;
  591. }
  592. iter->second.mem_offset_ = node_mem_offset;
  593. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  594. GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.",
  595. iter->second.mem_offset_, memory_type);
  596. } else {
  597. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  598. string current_node_full_name = op_desc->GetName();
  599. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  600. if (pos == string::npos) {
  601. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  602. kMbatchNodeNameFlag, n->GetName().c_str());
  603. return FAILED;
  604. }
  605. string fixed_name = current_node_full_name.substr(0, pos);
  606. vector<NodePtr> parallel_virtual_input_nodes;
  607. if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  608. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  609. }
  610. parallel_virtual_input_nodes.emplace_back(n);
  611. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  612. }
  613. }
  614. }
  615. int32_t mem_reuse_model = 0;
  616. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  617. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  618. return FAILED;
  619. }
  620. return SUCCESS;
  621. }
  622. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  623. OpDescPtr op_desc = node->GetOpDesc();
  624. // 1. set memory of to be reused input tensor
  625. auto in_data_anchor_list = node->GetAllInDataAnchors();
  626. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  627. GE_CHECK_NOTNULL(peer_out_data_anchor);
  628. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  629. GE_CHECK_NOTNULL(peer_op_desc);
  630. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  631. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  632. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  633. return FAILED;
  634. }
  635. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  636. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  637. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  638. // 2. set memory of output tensor
  639. vector<int64_t> output_list = op_desc->GetOutputOffset();
  640. if (output_list.empty()) {
  641. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  642. return FAILED;
  643. }
  644. if (op_desc->GetOutputsSize() > output_list.size()) {
  645. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  646. output_list.size());
  647. return FAILED;
  648. }
  649. int64_t attr_dim_index;
  650. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  651. if (!get_attr_dim_flag) {
  652. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  653. return FAILED;
  654. }
  655. size_t extra_memory_size = 0;
  656. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  657. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  658. size_t pre_mem_offset = mem_offset_reuse;
  659. // calculate tensor real size of each piece of data and out size of complete data
  660. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  661. GE_CHECK_NOTNULL(output_desc);
  662. int64_t output_mem_size;
  663. int64_t batch_dim_num = 1;
  664. int64_t out_size;
  665. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  666. SUCCESS) {
  667. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  668. op_desc->GetName().c_str(), out_data_anchor->GetIdx());
  669. return FAILED;
  670. }
  671. mem_offset_reuse += output_mem_size;
  672. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  673. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  674. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  675. pre_mem_offset, out_size, output_mem_size);
  676. }
  677. op_desc->SetOutputOffset(output_list);
  678. mem_offset_reuse += extra_memory_size;
  679. size_t after_mem_offset = mem_offset_reuse;
  680. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  681. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  682. return SUCCESS;
  683. }
  684. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  685. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  686. int64_t memory_type = RT_MEMORY_HBM;
  687. for (const auto &n : compute_graph_->GetAllNodes()) {
  688. OpDescPtr op_desc = n->GetOpDesc();
  689. GE_CHECK_NOTNULL(op_desc);
  690. bool attr_continuous = false;
  691. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  692. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  693. bool attr_reuse = false;
  694. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  695. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  696. if (attr_reuse && attr_continuous) {
  697. auto in_data_anchor_list = n->GetAllInDataAnchors();
  698. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  699. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  700. std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) +
  701. " has " + FmtToStr(in_data_anchor_list.size()) + " inputs.";
  702. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  703. return FAILED;
  704. }
  705. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  706. auto iter = memory_offset_.find(memory_type);
  707. if (iter == memory_offset_.end()) {
  708. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  709. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  710. return FAILED;
  711. }
  712. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.",
  713. iter->second.mem_offset_, memory_type);
  714. string batch_label_string;
  715. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  716. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  717. if (batch_label_string.empty()) {
  718. size_t node_mem_offset = iter->second.mem_offset_;
  719. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  720. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  721. if (status != SUCCESS) {
  722. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  723. return FAILED;
  724. }
  725. iter->second.mem_offset_ = node_mem_offset;
  726. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  727. GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.",
  728. iter->second.mem_offset_, memory_type);
  729. } else {
  730. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  731. string current_node_full_name = op_desc->GetName();
  732. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  733. if (pos == string::npos) {
  734. std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) +
  735. " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName());
  736. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  737. return FAILED;
  738. }
  739. string fixed_name = current_node_full_name.substr(0, pos);
  740. vector<NodePtr> parallel_virtual_output_nodes;
  741. if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  742. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  743. }
  744. parallel_virtual_output_nodes.emplace_back(n);
  745. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  746. }
  747. }
  748. }
  749. int32_t mem_reuse_model = 1;
  750. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  751. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  752. return FAILED;
  753. }
  754. return SUCCESS;
  755. }
  756. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  757. int32_t mem_reuse_model) {
  758. // Find max batch label value
  759. string max_batch_label;
  760. GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label),
  761. "Get max batch label failed.");
  762. PrintMemoryOffset();
  763. vector<size_t> nodes_mem_offset_list;
  764. for (auto &i_map : mem_reuse_nodes_map) {
  765. vector<NodePtr> virtual_nodes_list = i_map.second;
  766. int64_t memory_type = RT_MEMORY_HBM;
  767. GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type),
  768. "Get node list memory type failed.");
  769. auto iter = memory_offset_.find(memory_type);
  770. if (iter == memory_offset_.end()) {
  771. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  772. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  773. return FAILED;
  774. }
  775. size_t max_batch_node_mem_offset = iter->second.mem_offset_;
  776. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  777. for (auto &i_node : virtual_nodes_list) {
  778. // Op_desc is not nullptr, it has been checked.
  779. OpDescPtr op_desc = i_node->GetOpDesc();
  780. string batch_label_string;
  781. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  782. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  783. if (batch_label_string == max_batch_label) {
  784. Status status = SUCCESS;
  785. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  786. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  787. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  788. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  789. } else {
  790. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  791. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  792. return FAILED;
  793. }
  794. if (status != SUCCESS) {
  795. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  796. return FAILED;
  797. }
  798. iter->second.mem_offset_ = max_batch_node_mem_offset;
  799. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  800. GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.",
  801. iter->second.mem_offset_, memory_type);
  802. // Only assign memory of max batch nodes.
  803. break;
  804. }
  805. }
  806. }
  807. PrintMemoryOffset();
  808. size_t memory_reuse_index = 0;
  809. for (auto &i_map : mem_reuse_nodes_map) {
  810. vector<NodePtr> virtual_nodes_list = i_map.second;
  811. for (auto &i_node : virtual_nodes_list) {
  812. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  813. Status status = SUCCESS;
  814. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  815. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  816. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  817. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  818. } else {
  819. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  820. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  821. return FAILED;
  822. }
  823. if (status != SUCCESS) {
  824. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  825. return FAILED;
  826. }
  827. }
  828. memory_reuse_index++;
  829. }
  830. return SUCCESS;
  831. }
  832. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  833. map<NodePtr, vector<NodePtr>> normal_atomic_and_clean_nodes_map;
  834. vector<NodePtr> connecting_output_atomic_nodes;
  835. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  836. if (status != SUCCESS) {
  837. GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
  838. return status;
  839. }
  840. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  841. if (mem_iter == memory_offset_.end()) {
  842. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  843. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  844. return FAILED;
  845. }
  846. for (auto &iter : normal_atomic_and_clean_nodes_map) {
  847. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  848. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  849. for (auto &atomic_node : iter.second) {
  850. vector<int64_t> mem_offset_end;
  851. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  852. if (status != SUCCESS) {
  853. GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
  854. atomic_node->GetName().c_str());
  855. return status;
  856. }
  857. }
  858. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  859. if (atomic_mem_size != 0) {
  860. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}),
  861. "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
  862. }
  863. }
  864. if (AssignConnectNetOutputAtomicMemory(connecting_output_atomic_nodes) != SUCCESS) {
  865. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  866. return FAILED;
  867. }
  868. return SUCCESS;
  869. }
  870. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(map<NodePtr, vector<NodePtr>> &normal_atomic_nodes_map,
  871. vector<NodePtr> &connecting_output_atomic_nodes) {
  872. GE_CHECK_NOTNULL(compute_graph_);
  873. for (const auto &node : compute_graph_->GetAllNodes()) {
  874. if (node->GetType() == ATOMICADDRCLEAN) {
  875. vector<NodePtr> tmp_normal_atomic_nodes;
  876. const auto &out_control_anchor = node->GetOutControlAnchor();
  877. GE_CHECK_NOTNULL(out_control_anchor);
  878. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  879. if (peer_in_control_anchor != nullptr) {
  880. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  881. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  882. if (peer_in_node_desc != nullptr) {
  883. bool is_atomic_node = false;
  884. // If GetBool fail, is_atomic_node is false.
  885. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  886. if (is_atomic_node) {
  887. bool is_reference = false;
  888. // If GetBool fail, is_reference is false.
  889. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  890. if (is_reference) {
  891. std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) +
  892. " cannot have both atomic and is_reference attribute.";
  893. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  894. return ge::PARAM_INVALID;
  895. }
  896. vector<int> is_connecting_output;
  897. // If GetBool fail, attr is_connecting_output is an empty vector.
  898. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  899. if (is_connecting_output.empty()) {
  900. tmp_normal_atomic_nodes.emplace_back(peer_in_node);
  901. continue;
  902. }
  903. connecting_output_atomic_nodes.emplace_back(peer_in_node);
  904. tmp_normal_atomic_nodes.clear();
  905. break;
  906. }
  907. }
  908. }
  909. }
  910. if (!tmp_normal_atomic_nodes.empty()) {
  911. normal_atomic_nodes_map[node] = tmp_normal_atomic_nodes;
  912. }
  913. }
  914. }
  915. return SUCCESS;
  916. }
  917. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  918. vector<int64_t> &mem_offset_end) {
  919. auto node_op_desc = node->GetOpDesc();
  920. // Assign atomic node output memory
  921. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  922. if (ret != SUCCESS) {
  923. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  924. return ret;
  925. }
  926. // Check and assign atomic node workspace memory
  927. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  928. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  929. if (!atomic_workspace_info.empty()) {
  930. bool is_fusion_node = false;
  931. // If GetBool fail, is_fusion_node is false.
  932. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  933. if (is_fusion_node) {
  934. // Assign fusion atomic node workspace memory
  935. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  936. } else {
  937. // Assign single ordinary atomic node workspace memory, not include fusion node
  938. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  939. }
  940. if (ret != SUCCESS) {
  941. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  942. return ret;
  943. }
  944. } else {
  945. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  946. }
  947. return SUCCESS;
  948. }
  949. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  950. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  951. if (iter == memory_offset_.end()) {
  952. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  953. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  954. return FAILED;
  955. }
  956. for (auto &node : connect_netoutput_nodes) {
  957. GE_CHECK_NOTNULL(node);
  958. if (node->GetOpDesc() == nullptr) {
  959. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  960. continue;
  961. }
  962. // Atomic memory start addr
  963. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  964. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  965. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  966. vector<int64_t> mem_offset_end;
  967. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  968. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  969. return FAILED;
  970. }
  971. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  972. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  973. GELOGE(FAILED, "Failed to set atomic attr separately.");
  974. return FAILED;
  975. }
  976. }
  977. return SUCCESS;
  978. }
  979. Status GraphMemoryAssigner::AssignReferenceMemory() {
  980. for (auto &node : compute_graph_->GetDirectNode()) {
  981. // Get the reference type of the node, default is false
  982. bool is_ref = false;
  983. // If GetBool fail, is_ref is false.
  984. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  985. if (!is_ref) {
  986. continue;
  987. }
  988. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  989. node->GetName().c_str());
  990. auto out_op_desc = node->GetOpDesc();
  991. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  992. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  993. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  994. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  995. out_op_desc->GetOutputsSize(), output_list.size());
  996. return ge::FAILED;
  997. }
  998. map<string, int> input_name_index;
  999. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  1000. int index = out_op_desc->GetInputIndexByName(input_name);
  1001. input_name_index.emplace(input_name, index);
  1002. }
  1003. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1004. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  1005. auto iter = input_name_index.find(out_data_anchor_name);
  1006. if (iter != input_name_index.end()) {
  1007. int index = iter->second;
  1008. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  1009. iter->first.c_str(), out_data_anchor_name.c_str());
  1010. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  1011. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  1012. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  1013. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  1014. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  1015. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  1016. GE_CHECK_NOTNULL(peer_out_op_desc);
  1017. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  1018. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  1019. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  1020. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  1021. } else {
  1022. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  1023. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  1024. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  1025. }
  1026. }
  1027. out_op_desc->SetOutputOffset(output_list);
  1028. }
  1029. return ge::SUCCESS;
  1030. }
  1031. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  1032. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  1033. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  1034. if (peer_out_data_anchor == nullptr) {
  1035. continue;
  1036. }
  1037. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  1038. if (peer_op_desc == nullptr) {
  1039. continue;
  1040. }
  1041. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  1042. (peer_op_desc->GetType() == VARIABLE)) {
  1043. std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" +
  1044. FmtToStr(peer_op_desc->GetName()) + " is invalid, only support Constant/AippData/Variable";
  1045. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1046. return false;
  1047. }
  1048. }
  1049. return true;
  1050. }
  1051. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  1052. auto op_desc = node->GetOpDesc();
  1053. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1054. mem_offset_end.clear();
  1055. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1056. vector<int64_t> atomic_output_index;
  1057. // If GetListInt fail, atomic_output_index is empty.
  1058. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1059. // Check atomic output
  1060. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1061. if (atomic_output_index.size() > output_list.size()) {
  1062. std::string error = "Op" + FmtToStr(node->GetName()) +
  1063. "'s size of atomic_output_index is more than the size of output_list";
  1064. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1065. return ge::FAILED;
  1066. }
  1067. auto output_list_size = static_cast<int64_t>(output_list.size());
  1068. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1069. if (iter == memory_offset_.end()) {
  1070. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1071. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1072. return FAILED;
  1073. }
  1074. for (auto &output_index : atomic_output_index) {
  1075. if (output_index >= output_list_size) {
  1076. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  1077. " is more than the size" + FmtToStr(output_list_size) + " of output_list.";
  1078. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1079. return ge::PARAM_INVALID;
  1080. }
  1081. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1082. bool is_assigned_mem = false;
  1083. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1084. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  1085. return ge::FAILED;
  1086. }
  1087. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1088. if (is_assigned_mem) {
  1089. GELOGI(
  1090. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1091. "ReAssignContinuousMemory function.",
  1092. op_desc->GetName().c_str());
  1093. continue;
  1094. }
  1095. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1096. int64_t size = 0;
  1097. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1098. GELOGI("Get size failed");
  1099. }
  1100. output_list[output_index] = iter->second.mem_offset_;
  1101. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].",
  1102. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index,
  1103. iter->second.mem_offset_, op_desc->GetStreamId(), size, size);
  1104. iter->second.mem_offset_ += size;
  1105. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1106. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1107. }
  1108. op_desc->SetOutputOffset(output_list);
  1109. return ge::SUCCESS;
  1110. }
  1111. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1112. bool &is_mem_assigned) {
  1113. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1114. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  1115. " is more than the size of node's AllOutDataAnchors.";
  1116. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1117. return ge::PARAM_INVALID;
  1118. }
  1119. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1120. GE_CHECK_NOTNULL(out_data_anchor);
  1121. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1122. for (auto &input_anchor : input_anchors) {
  1123. auto output_node = input_anchor->GetOwnerNode();
  1124. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1125. /// has been assigned
  1126. vector<int64_t> atomic_input_index;
  1127. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1128. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1129. is_mem_assigned = true;
  1130. break;
  1131. }
  1132. }
  1133. return SUCCESS;
  1134. }
  1135. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1136. map<string, map<int64_t, int64_t>> &workspace_info,
  1137. vector<int64_t> &mem_offset_end) {
  1138. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1139. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1140. if (mem_type_iter == memory_offset_.end()) {
  1141. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1142. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1143. return FAILED;
  1144. }
  1145. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1146. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1147. if (op_desc->GetName() != iter->first) {
  1148. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1149. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1150. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1151. return ge::PARAM_INVALID;
  1152. }
  1153. if (iter->second.empty()) {
  1154. continue;
  1155. }
  1156. for (auto &info_iter : iter->second) {
  1157. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1158. auto workspace_size = info_iter.second;
  1159. if (workspace_index >= workspace_vector.size()) {
  1160. std::string error = "The workspace index" + FmtToStr(workspace_index) +
  1161. " is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector.";
  1162. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1163. return ge::PARAM_INVALID;
  1164. }
  1165. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1166. GELOGI(
  1167. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1168. "size[%ld] real_size[%ld].",
  1169. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1170. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size);
  1171. mem_type_iter->second.mem_offset_ += workspace_size;
  1172. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1173. }
  1174. }
  1175. op_desc->SetWorkspace(workspace_vector);
  1176. return SUCCESS;
  1177. }
  1178. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1179. map<string, map<int64_t, int64_t>> &workspace_info,
  1180. vector<int64_t> &mem_offset_end) {
  1181. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1182. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1183. if (mem_type_iter == memory_offset_.end()) {
  1184. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1185. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1186. return FAILED;
  1187. }
  1188. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1189. for (auto &iter : workspace_info) {
  1190. if (iter.second.empty()) {
  1191. continue;
  1192. }
  1193. map<int64_t, int64_t> index_offset;
  1194. for (auto &info_iter : iter.second) {
  1195. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1196. auto workspace_size = info_iter.second;
  1197. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1198. GELOGI(
  1199. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1200. "real_size[%ld].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1201. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size);
  1202. mem_type_iter->second.mem_offset_ += workspace_size;
  1203. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1204. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1205. }
  1206. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1207. }
  1208. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1209. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1210. return FAILED;
  1211. }
  1212. return SUCCESS;
  1213. }
  1214. Status GraphMemoryAssigner::CheckOffset() {
  1215. std::map<std::string, std::string> anchor_to_symbol;
  1216. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1217. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1218. GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
  1219. return FAILED;
  1220. }
  1221. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1222. GE_CHECK_NOTNULL(node->GetOpDesc());
  1223. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1224. for (auto input : input_list) {
  1225. if (input == ge::kInvalidOffset) {
  1226. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1227. + " in node" + FmtToStr(node->GetName());
  1228. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1229. return FAILED;
  1230. }
  1231. }
  1232. bool need_update_output = false;
  1233. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1234. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1235. if (output_list[i] == ge::kInvalidOffset) {
  1236. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1237. + " in node" + FmtToStr(node->GetName());
  1238. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1239. return FAILED;
  1240. }
  1241. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1242. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1243. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1244. output_list[i] = symbol_offset;
  1245. need_update_output = true;
  1246. }
  1247. }
  1248. }
  1249. if (need_update_output) {
  1250. node->GetOpDesc()->SetOutputOffset(output_list);
  1251. }
  1252. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1253. for (auto workspace : workspace_list) {
  1254. if (workspace == ge::kInvalidOffset) {
  1255. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1256. + " in node" + FmtToStr(node->GetName());
  1257. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1258. GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1259. return FAILED;
  1260. }
  1261. }
  1262. }
  1263. return SUCCESS;
  1264. }
  1265. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1266. if (memory_offset_.empty()) {
  1267. GELOGE(FAILED, "memory_offset_ is empty.");
  1268. return FAILED;
  1269. }
  1270. for (auto pair : memory_offset_) {
  1271. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(),
  1272. pair.second.mem_offset_, pair.first);
  1273. }
  1274. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1275. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1276. GELOGE(ge::FAILED, "Update op input offset failed");
  1277. return ge::FAILED;
  1278. }
  1279. }
  1280. return ge::SUCCESS;
  1281. }
  1282. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1283. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1284. return node;
  1285. }
  1286. if (NodeUtils::IsDynamicShape(node)) {
  1287. return node;
  1288. }
  1289. return NodeUtils::GetParentInput(node);
  1290. }
  1291. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1292. uint32_t parent_index = 0;
  1293. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1294. return SUCCESS;
  1295. }
  1296. // Subgraph Data Node, check for constant input.
  1297. std::string op_type;
  1298. const auto &in_node = NodeUtils::GetParentInput(node);
  1299. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1300. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1301. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1302. return SUCCESS; // Constant input.
  1303. }
  1304. // Memory allocated for dynamic shape subgraph Data.
  1305. if (NodeUtils::IsDynamicShape(node)) {
  1306. return SUCCESS;
  1307. }
  1308. const auto &owner = node->GetOwnerComputeGraph();
  1309. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1310. const auto parent_inputs = parent_desc->GetInputOffset();
  1311. if (parent_inputs.size() <= parent_index) {
  1312. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1313. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1314. FmtToStr(parent_index);
  1315. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1316. return FAILED;
  1317. }
  1318. input_list = {parent_inputs[parent_index]};
  1319. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1320. return SUCCESS;
  1321. }
  1322. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1323. vector<int64_t> origin_input_list;
  1324. vector<int64_t> memory_type;
  1325. auto tmp_op_desc = node->GetOpDesc();
  1326. origin_input_list = tmp_op_desc->GetInputOffset();
  1327. int64_t valid_input_index = 0;
  1328. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1329. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1330. vector<int64_t> output_list;
  1331. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1332. if (peer_out_anchor == nullptr) {
  1333. continue;
  1334. }
  1335. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1336. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1337. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1338. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1339. output_list = last_peer_out_op_desc->GetOutputOffset();
  1340. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1341. if (output_list.size() > static_cast<size_t>(out_index)) {
  1342. int64_t input_offset = output_list.at(out_index);
  1343. if (has_mem_type_attr && !origin_input_list.empty()) {
  1344. auto input_size = tmp_op_desc->GetInputsSize();
  1345. auto ori_input_offset_list_size = origin_input_list.size();
  1346. auto mem_type_size = memory_type.size();
  1347. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1348. std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) +
  1349. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1350. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1351. FmtToStr(ori_input_offset_list_size);
  1352. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1353. return ge::FAILED;
  1354. }
  1355. // not hbm keep orignal inputoffest
  1356. // hbm inputoffset = original inputoffset + outputoffset
  1357. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1358. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1359. }
  1360. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1361. if (in_node->GetType() == CONSTANT) {
  1362. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1363. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1364. }
  1365. GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
  1366. has_mem_type_attr == true ? "Fusion" : "",
  1367. tmp_op_desc->GetName().c_str(),
  1368. valid_input_index,
  1369. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1370. out_index,
  1371. input_offset);
  1372. input_list.emplace_back(input_offset);
  1373. valid_input_index++;
  1374. }
  1375. }
  1376. return ge::SUCCESS;
  1377. }
  1378. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1379. GE_CHECK_NOTNULL(node->GetOpDesc());
  1380. vector<int64_t> input_list;
  1381. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1382. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1383. vector<int64_t> output_list;
  1384. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1385. if (peer_out_anchor == nullptr) {
  1386. continue;
  1387. }
  1388. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1389. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1390. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1391. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1392. if (last_peer_out_node->GetType() != VARIABLE) {
  1393. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1394. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1395. output_list = last_peer_out_op_desc->GetOutputOffset();
  1396. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1397. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1398. }
  1399. } else {
  1400. vector<int64_t> cur_node_input_list;
  1401. auto cur_node_op_desc = node->GetOpDesc();
  1402. GE_CHECK_NOTNULL(cur_node_op_desc);
  1403. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1404. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1405. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1406. }
  1407. }
  1408. }
  1409. } else if (node->GetType() == DATA_TYPE) {
  1410. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1411. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1412. return FAILED;
  1413. }
  1414. } else {
  1415. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1416. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1417. return FAILED;
  1418. }
  1419. }
  1420. node->GetOpDesc()->SetInputOffset(input_list);
  1421. return SUCCESS;
  1422. }
  1423. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1424. const vector<int64_t> &mem_offset_end) {
  1425. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1426. // Parsing offset and size vectors
  1427. vector<int64_t> memory_offset_start;
  1428. vector<int64_t> memory_offset_size;
  1429. memory_offset_start.emplace_back(atomic_mem_start);
  1430. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1431. memory_offset_start.emplace_back(mem_offset_end[i]);
  1432. // Number 1 means element index
  1433. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1434. memory_offset_size.emplace_back(size);
  1435. }
  1436. memory_offset_start.pop_back();
  1437. const auto &in_control_anchor = node->GetInControlAnchor();
  1438. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1439. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1440. if (peer_out_control_anchor == nullptr) {
  1441. continue;
  1442. }
  1443. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1444. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1445. if (peer_out_node_desc == nullptr) {
  1446. continue;
  1447. }
  1448. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1449. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1450. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1451. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1452. GELOGE(FAILED, "Set atomic clean attr failed.");
  1453. return FAILED;
  1454. }
  1455. }
  1456. }
  1457. }
  1458. return SUCCESS;
  1459. }
  1460. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1461. const vector<int64_t> &atomic_mem_size) {
  1462. auto node_op_desc = node->GetOpDesc();
  1463. if (node_op_desc != nullptr) {
  1464. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1465. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1466. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1467. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1468. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1469. node_op_desc->SetWorkspace(workspace_vector);
  1470. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1471. std::vector<int64_t> mem_start_vector;
  1472. // If GetListInt fail, mem_start_vector is empty.
  1473. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1474. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1475. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1476. GELOGE(FAILED, "SetListInt failed.");
  1477. return FAILED);
  1478. std::vector<int64_t> mem_size_vector;
  1479. // If GetListInt fail, mem_size_vector is empty.
  1480. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1481. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1482. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1483. GELOGE(FAILED, "SetListInt failed.");
  1484. return FAILED);
  1485. std::stringstream ss;
  1486. for (auto iter : atomic_mem_start) {
  1487. ss << iter << " ";
  1488. }
  1489. string atomic_mem_start_str = ss.str();
  1490. ss.clear();
  1491. ss.str("");
  1492. for (auto iter : atomic_mem_size) {
  1493. ss << iter << " ";
  1494. }
  1495. string atomic_mem_size_str = ss.str();
  1496. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]",
  1497. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1498. atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str());
  1499. }
  1500. return SUCCESS;
  1501. }
  1502. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1503. if (mem_align_size <= 0) {
  1504. return;
  1505. }
  1506. auto iter = memory_offset_.find(memory_type);
  1507. if (iter == memory_offset_.end()) {
  1508. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1509. return;
  1510. }
  1511. iter->second.mem_offset_ =
  1512. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1513. }
  1514. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1515. int64_t &memory_type) {
  1516. memory_type = RT_MEMORY_HBM;
  1517. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1518. for (auto &n : nodes) {
  1519. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1520. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.")
  1521. break;
  1522. }
  1523. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1524. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  1525. break;
  1526. }
  1527. }
  1528. return SUCCESS;
  1529. }
  1530. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1531. memory_type = RT_MEMORY_HBM;
  1532. vector<int64_t> mem_type_list;
  1533. if (input_or_output == "input") {
  1534. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1535. }
  1536. if (input_or_output == "output") {
  1537. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1538. }
  1539. if (mem_type_list.empty()) {
  1540. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1541. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1542. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1543. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1544. return FAILED;
  1545. }
  1546. return SUCCESS;
  1547. }
  1548. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1549. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1550. " of mem type list is not equal to the size of in data anchor" +
  1551. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1552. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1553. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1554. return FAILED;
  1555. }
  1556. if (!CheckContinuousMemType(mem_type_list)) {
  1557. GELOGE(FAILED, "Check continuous memory type failed.");
  1558. return FAILED;
  1559. }
  1560. // It is continuous memory and memory type is the same, so use the first memory.
  1561. memory_type = mem_type_list[0];
  1562. return SUCCESS;
  1563. }
  1564. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1565. if (mem_type_list.size() == 0) {
  1566. return true;
  1567. }
  1568. int64_t mem_type_tmp = mem_type_list[0];
  1569. for (auto mem_type : mem_type_list) {
  1570. if (mem_type != mem_type_tmp) {
  1571. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1572. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1573. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1574. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1575. mem_type_tmp, mem_type);
  1576. return false;
  1577. }
  1578. }
  1579. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1580. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1581. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1582. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1583. return false;
  1584. }
  1585. return true;
  1586. }
  1587. void GraphMemoryAssigner::PrintMemoryOffset() {
  1588. for (auto pair : memory_offset_) {
  1589. // Assign memory of max batch nodes that have the same batch label.
  1590. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1591. pair.first, pair.second.mem_offset_);
  1592. }
  1593. }
  1594. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示