You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 82 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. namespace {
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. const size_t kVirtualInputNodeOutputSize = 1;
  37. const size_t kVirtualOutputNodeInputSize = 1;
  38. const size_t kVirtualNodeDataIndex = 0;
  39. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  40. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  41. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  42. const ge::NodePtr &node, const uint32_t i) {
  43. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  44. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  45. if (iter1 == anchor_to_symbol.end()) {
  46. return ge::kInvalidOffset;
  47. }
  48. auto out_symbol = iter1->second;
  49. auto iter2 = symbol_to_anchors.find(out_symbol);
  50. if (iter2 == symbol_to_anchors.end()) {
  51. return ge::kInvalidOffset;
  52. }
  53. for (const auto &node_index_io : iter2->second) {
  54. if (node_index_io.value_ == out_symbol) {
  55. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  56. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  57. if (node_index_io.index_ >= symbol_output_list.size()) {
  58. return ge::kInvalidOffset;
  59. }
  60. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  61. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  62. return symbol_output_list.at(node_index_io.index_);
  63. }
  64. }
  65. return ge::kInvalidOffset;
  66. }
  67. } // namespace
  68. namespace ge {
  69. Status VariableMemoryAssigner::Assign() {
  70. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  71. if (result != ge::SUCCESS) {
  72. return result;
  73. }
  74. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  75. if (result != ge::SUCCESS) {
  76. return result;
  77. }
  78. return ge::SUCCESS;
  79. }
  80. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  81. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  82. if (result != ge::SUCCESS) {
  83. return result;
  84. }
  85. return ge::SUCCESS;
  86. }
  87. Status GraphMemoryAssigner::AssignMemory() {
  88. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  89. if (mem_assigner->Assign() != ge::SUCCESS) {
  90. GELOGE(ge::FAILED, "Memory assigner failed");
  91. return ge::FAILED;
  92. }
  93. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  94. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  95. if (mem_assigner->GetP2PMemOffset() >= 0) {
  96. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  97. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  98. }
  99. auto session_id = compute_graph_->GetSessionID();
  100. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  101. auto variable_assigner =
  102. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  103. if (variable_assigner == nullptr) {
  104. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  105. return ge::FAILED;
  106. }
  107. if (variable_assigner->Assign() != ge::SUCCESS) {
  108. return ge::FAILED;
  109. }
  110. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  111. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  112. mem_assigner_ = std::move(mem_assigner);
  113. return ge::SUCCESS;
  114. }
  115. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  116. auto variable_assigner =
  117. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  118. if (variable_assigner == nullptr) {
  119. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  120. return ge::FAILED;
  121. }
  122. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  123. return ge::FAILED;
  124. }
  125. return ge::SUCCESS;
  126. }
  127. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  128. int64_t dim_index, int64_t &output_mem_size,
  129. int64_t &batch_dim_num, int64_t &out_size) {
  130. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  131. if (graph_status != GRAPH_SUCCESS) {
  132. GELOGE(FAILED, "Opdesc GetSize failed!");
  133. return FAILED;
  134. }
  135. GeShape output_shape = output_desc->GetShape();
  136. std::vector<int64_t> output_dims = output_shape.GetDims();
  137. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  138. std::string error = "Invaild value" + FmtToStr(dim_index) +
  139. " of attr _reuse_input_on_dim_index, which is out of data range [0,"
  140. + std::to_string(output_dims.size()) + ")";
  141. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  142. return FAILED;
  143. }
  144. for (int64_t index = 0; index < dim_index; index++) {
  145. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  146. batch_dim_num *= output_dims[index];
  147. output_dims[index] = 1;
  148. }
  149. output_shape = GeShape(output_dims);
  150. Format out_format = output_desc->GetFormat();
  151. DataType data_type = output_desc->GetDataType();
  152. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  153. if (graph_status != GRAPH_SUCCESS) {
  154. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  155. return FAILED;
  156. }
  157. if (output_mem_size < 0) {
  158. std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) +
  159. " is out of data range [0," + std::to_string(INT64_MAX) + "]";
  160. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  161. return FAILED;
  162. }
  163. return SUCCESS;
  164. }
  165. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  166. int32_t mem_reuse_model, string &max_batch_label) {
  167. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  168. vector<NodePtr> virtual_nodes_list = i_map.second;
  169. vector<int64_t> max_shape_dims;
  170. size_t max_batch_dim = 0;
  171. bool max_batch_dim_find = false;
  172. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  173. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  174. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  175. GE_CHECK_NOTNULL(op_desc);
  176. ge::ConstGeTensorDescPtr input_output_desc;
  177. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  178. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  179. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  180. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  181. } else {
  182. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  183. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  184. return FAILED;
  185. }
  186. GE_CHECK_NOTNULL(input_output_desc);
  187. if (i == 0) {
  188. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  189. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  190. max_shape_dims = input_output_desc->GetShape().GetDims();
  191. } else {
  192. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  193. if (current_shape_dims.size() != max_shape_dims.size()) {
  194. std::string error = "The shape of several nodes between multiple batches does not match.";
  195. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  196. return FAILED;
  197. }
  198. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  199. if (current_shape_dims[j] == max_shape_dims[j]) {
  200. continue;
  201. }
  202. if (max_batch_dim_find && max_batch_dim != j) {
  203. std::string error = "The shape of several nodes between multiple batches does not match.";
  204. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  205. return FAILED;
  206. }
  207. max_batch_dim_find = true;
  208. max_batch_dim = j;
  209. if (current_shape_dims[j] > max_shape_dims[j]) {
  210. max_shape_dims[j] = current_shape_dims[j];
  211. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  212. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  213. }
  214. // Only compare the first different dim in shape.
  215. break;
  216. }
  217. }
  218. }
  219. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  220. break;
  221. }
  222. return SUCCESS;
  223. }
  224. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  225. if (memory_offset_.empty()) {
  226. GELOGE(FAILED, "memory_offset_ is empty.");
  227. return ge::FAILED;
  228. }
  229. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  230. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  231. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  232. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  233. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  234. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  235. size_t total_mem_offset = 0;
  236. for (auto pair : memory_offset_) {
  237. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  238. total_mem_offset += pair.second.mem_offset_;
  239. }
  240. auto session_id = compute_graph_->GetSessionID();
  241. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  242. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset,
  243. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  244. for (auto iter : mem_type_to_offset) {
  245. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  246. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  247. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  248. }
  249. return ge::FAILED;
  250. }
  251. return SUCCESS;
  252. }
  253. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  254. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  255. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  256. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  257. // set offset for zero copy block
  258. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  259. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  260. continue;
  261. }
  262. memory_block->Resize();
  263. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  264. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  265. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  266. }
  267. // set offset for zero copy nodes
  268. priority_assigner->SetOpMemOffset(true);
  269. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  270. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  271. if (iter == memory_offset_.end()) {
  272. std::string error = "Memory offset does not have memory type[HBM]";
  273. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  274. return FAILED;
  275. }
  276. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  277. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  278. zero_mem_copy_size);
  279. return SUCCESS;
  280. }
  281. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  282. Status ret;
  283. for (auto &node : compute_graph_->GetAllNodes()) {
  284. // Get the continuous input type of the node, default is false
  285. bool is_input_continuous = false;
  286. GE_CHECK_NOTNULL(node->GetOpDesc());
  287. // If GetBool fail, is_input_continuous is false.
  288. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  289. // Assign continuous input memory
  290. if (is_input_continuous) {
  291. int64_t memory_type = RT_MEMORY_HBM;
  292. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
  293. int64_t mem_clean_start = 0;
  294. int64_t mem_clean_size = 0;
  295. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type);
  296. if (ret != ge::SUCCESS) {
  297. GELOGE(ret, "Assign continuous input memory failed!");
  298. return ret;
  299. }
  300. // Clean up atomic address, eg, hcom node
  301. vector<int32_t> input_indexes;
  302. // If GetListInt fail, input_indexes is empty.
  303. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  304. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  305. // check whether there is an atomic conflict between the current node and the peer out node
  306. if (!CheckInputIsSupportAtomic(node)) {
  307. GELOGE(ge::FAILED,
  308. "There is an atomic conflict between the current node and the peer out node, not supported!");
  309. return ge::FAILED;
  310. }
  311. const auto &in_control_anchor = node->GetInControlAnchor();
  312. GE_CHECK_NOTNULL(in_control_anchor);
  313. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  314. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  315. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  316. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size});
  317. if (ret != SUCCESS) {
  318. GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
  319. return ret;
  320. }
  321. }
  322. }
  323. }
  324. }
  325. // Get the reference type of the node, default is false
  326. bool is_ref = false;
  327. // If GetBool fail, is_ref is false.
  328. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  329. // Get the continuous output type of the node, default is false
  330. bool is_output_continuous = false;
  331. // If GetBool fail, is_output_continuous is false.
  332. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  333. // If the output is ref type and refers to the ref of an input, the name of the output
  334. // and the input are the same. Ge encounters ref type, finds matching relationship according
  335. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  336. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  337. ret = AssignContinuousOutputMemory(node);
  338. if (ret != ge::SUCCESS) {
  339. GELOGE(ret, "Assign reference memory failed!");
  340. return ret;
  341. }
  342. }
  343. }
  344. for (auto pair : memory_offset_) {
  345. GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
  346. pair.second.mem_offset_);
  347. }
  348. return ge::SUCCESS;
  349. }
  350. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  351. int64_t &continuous_mem_size, int64_t memory_type) {
  352. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  353. bool continuous_input_alloc = false;
  354. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  355. auto iter = memory_offset_.find(memory_type);
  356. if (iter == memory_offset_.end()) {
  357. std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type);
  358. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  359. return FAILED;
  360. }
  361. continuous_mem_start = iter->second.mem_offset_;
  362. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  363. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  364. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  365. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  366. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  367. bool is_peer_output_continuous = false;
  368. // If GetBool fail, is_peer_output_continuous is false.
  369. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  370. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  371. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  372. // conflict between the two, we can not support it.
  373. auto peer_output_size = peer_op_desc->GetOutputsSize();
  374. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  375. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  376. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  377. " requires continuous output. There may be conflict between the two." +
  378. "This node is not supported now.";
  379. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  380. return PARAM_INVALID;);
  381. bool is_peer_reference = false;
  382. // If GetBool fail, is_peer_reference is false.
  383. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  384. GE_IF_BOOL_EXEC(is_peer_reference,
  385. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  386. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  387. " requires continuous output. There may be conflict between the two." +
  388. "This node is not supported now.";
  389. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  390. return PARAM_INVALID;);
  391. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  392. std::vector<int64_t> offsets_for_fusion = {};
  393. bool has_offset_attr =
  394. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  395. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  396. if (continuous_input_alloc && !has_offset_attr) {
  397. if (in_data_anchor->GetIdx() == 0) {
  398. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  399. }
  400. // can not use else if, incase only one input
  401. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  402. int64_t tensor_desc_size = 0;
  403. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  404. tensor_desc_size);
  405. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  406. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  407. continuous_mem_size =
  408. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  409. }
  410. GELOGI(
  411. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] "
  412. "real_size[%u].",
  413. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  414. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  415. 0, 0);
  416. continue;
  417. }
  418. output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_;
  419. } else {
  420. std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  421. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  422. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  423. return FAILED;
  424. }
  425. peer_op_desc->SetOutputOffset(output_list);
  426. size_t pre_mem_offset = iter->second.mem_offset_;
  427. int64_t tensor_desc_size = 0;
  428. if (has_offset_attr) {
  429. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  430. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  431. iter->second.mem_offset_ += offset_for_fusion;
  432. } else {
  433. std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) +
  434. " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  435. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  436. return FAILED;
  437. }
  438. } else {
  439. Status ret =
  440. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  441. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  442. iter->second.mem_offset_ += tensor_desc_size;
  443. }
  444. // If set tensor_actual_size, Memory alignment is not required.
  445. int32_t is_tensor_actual_size = 0;
  446. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  447. if (is_tensor_actual_size == 0) {
  448. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  449. }
  450. GELOGI(
  451. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  452. "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  453. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(),
  454. (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size);
  455. }
  456. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  457. if (!continuous_input_alloc) {
  458. continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start;
  459. }
  460. return SUCCESS;
  461. }
  462. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  463. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  464. auto out_op_desc = node->GetOpDesc();
  465. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  466. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  467. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  468. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  469. out_op_desc->GetOutputsSize(), output_list.size());
  470. return ge::FAILED;
  471. }
  472. size_t mem_offset = output_list[0];
  473. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  474. output_list[out_data_anchor->GetIdx()] = mem_offset;
  475. int64_t tensor_desc_size = 0;
  476. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  477. ge::SUCCESS) {
  478. GELOGE(FAILED, "GetSize failed.");
  479. return FAILED;
  480. }
  481. mem_offset += tensor_desc_size;
  482. if (mem_offset <= 0) {
  483. return FAILED;
  484. }
  485. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  486. GELOGI(
  487. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  488. "real_size[%ld].",
  489. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  490. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  491. }
  492. out_op_desc->SetOutputOffset(output_list);
  493. return ge::SUCCESS;
  494. }
  495. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  496. OpDescPtr op_desc = node->GetOpDesc();
  497. vector<int64_t> output_list = op_desc->GetOutputOffset();
  498. if (output_list.empty()) {
  499. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  500. return FAILED;
  501. }
  502. output_list.at(0) = mem_offset_reuse;
  503. op_desc->SetOutputOffset(output_list);
  504. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  505. int64_t attr_dim_index;
  506. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  507. if (!get_attr_dim_flag) {
  508. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  509. return FAILED;
  510. }
  511. size_t extra_memory_size = 0;
  512. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  513. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  514. GE_CHECK_NOTNULL(peer_out_data_anchor);
  515. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  516. GE_CHECK_NOTNULL(peer_op_desc);
  517. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  518. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  519. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  520. return ge::FAILED;
  521. }
  522. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  523. peer_op_desc->SetOutputOffset(output_offsets);
  524. size_t pre_mem_offset = mem_offset_reuse;
  525. // Calculate tensor real size of each piece of data and out size of complete data
  526. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  527. GE_CHECK_NOTNULL(output_desc);
  528. int64_t output_mem_size;
  529. int64_t batch_dim_num = 1;
  530. int64_t out_size;
  531. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  532. SUCCESS) {
  533. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  534. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  535. return FAILED;
  536. }
  537. mem_offset_reuse += output_mem_size;
  538. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  539. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  540. "real_size[%ld].",
  541. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  542. peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size,
  543. output_mem_size);
  544. }
  545. mem_offset_reuse += extra_memory_size;
  546. size_t after_mem_offset = mem_offset_reuse;
  547. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  548. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  549. return SUCCESS;
  550. }
  551. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  552. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  553. int64_t memory_type = RT_MEMORY_HBM;
  554. for (const auto &n : compute_graph_->GetAllNodes()) {
  555. OpDescPtr op_desc = n->GetOpDesc();
  556. GE_CHECK_NOTNULL(op_desc);
  557. bool attr_continuous = false;
  558. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  559. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  560. bool attr_reuse = false;
  561. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  562. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  563. if (attr_reuse && attr_continuous) {
  564. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  565. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  566. std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) +
  567. " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs.";
  568. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  569. return FAILED;
  570. }
  571. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.");
  572. auto iter = memory_offset_.find(memory_type);
  573. if (iter == memory_offset_.end()) {
  574. std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type);
  575. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  576. return FAILED;
  577. }
  578. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.",
  579. iter->second.mem_offset_, memory_type);
  580. string batch_label_string;
  581. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  582. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  583. if (batch_label_string.empty()) {
  584. size_t node_mem_offset = iter->second.mem_offset_;
  585. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  586. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  587. if (status != SUCCESS) {
  588. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  589. return FAILED;
  590. }
  591. iter->second.mem_offset_ = node_mem_offset;
  592. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  593. GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.",
  594. iter->second.mem_offset_, memory_type);
  595. } else {
  596. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  597. string current_node_full_name = op_desc->GetName();
  598. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  599. if (pos == string::npos) {
  600. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  601. kMbatchNodeNameFlag, n->GetName().c_str());
  602. return FAILED;
  603. }
  604. string fixed_name = current_node_full_name.substr(0, pos);
  605. vector<NodePtr> parallel_virtual_input_nodes;
  606. if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  607. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  608. }
  609. parallel_virtual_input_nodes.emplace_back(n);
  610. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  611. }
  612. }
  613. }
  614. int32_t mem_reuse_model = 0;
  615. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  616. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  617. return FAILED;
  618. }
  619. return SUCCESS;
  620. }
  621. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  622. OpDescPtr op_desc = node->GetOpDesc();
  623. // 1. set memory of to be reused input tensor
  624. auto in_data_anchor_list = node->GetAllInDataAnchors();
  625. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  626. GE_CHECK_NOTNULL(peer_out_data_anchor);
  627. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  628. GE_CHECK_NOTNULL(peer_op_desc);
  629. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  630. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  631. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  632. return FAILED;
  633. }
  634. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  635. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  636. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  637. // 2. set memory of output tensor
  638. vector<int64_t> output_list = op_desc->GetOutputOffset();
  639. if (output_list.empty()) {
  640. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  641. return FAILED;
  642. }
  643. if (op_desc->GetOutputsSize() > output_list.size()) {
  644. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  645. output_list.size());
  646. return FAILED;
  647. }
  648. int64_t attr_dim_index;
  649. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  650. if (!get_attr_dim_flag) {
  651. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  652. return FAILED;
  653. }
  654. size_t extra_memory_size = 0;
  655. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  656. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  657. size_t pre_mem_offset = mem_offset_reuse;
  658. // calculate tensor real size of each piece of data and out size of complete data
  659. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  660. GE_CHECK_NOTNULL(output_desc);
  661. int64_t output_mem_size;
  662. int64_t batch_dim_num = 1;
  663. int64_t out_size;
  664. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  665. SUCCESS) {
  666. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  667. op_desc->GetName().c_str(), out_data_anchor->GetIdx());
  668. return FAILED;
  669. }
  670. mem_offset_reuse += output_mem_size;
  671. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  672. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  673. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  674. pre_mem_offset, out_size, output_mem_size);
  675. }
  676. op_desc->SetOutputOffset(output_list);
  677. mem_offset_reuse += extra_memory_size;
  678. size_t after_mem_offset = mem_offset_reuse;
  679. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  680. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  681. return SUCCESS;
  682. }
  683. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  684. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  685. int64_t memory_type = RT_MEMORY_HBM;
  686. for (const auto &n : compute_graph_->GetAllNodes()) {
  687. OpDescPtr op_desc = n->GetOpDesc();
  688. GE_CHECK_NOTNULL(op_desc);
  689. bool attr_continuous = false;
  690. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  691. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  692. bool attr_reuse = false;
  693. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  694. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  695. if (attr_reuse && attr_continuous) {
  696. auto in_data_anchor_list = n->GetAllInDataAnchors();
  697. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  698. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  699. std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) +
  700. " has " + FmtToStr(in_data_anchor_list.size()) + " inputs.";
  701. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  702. return FAILED;
  703. }
  704. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  705. auto iter = memory_offset_.find(memory_type);
  706. if (iter == memory_offset_.end()) {
  707. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  708. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  709. return FAILED;
  710. }
  711. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.",
  712. iter->second.mem_offset_, memory_type);
  713. string batch_label_string;
  714. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  715. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  716. if (batch_label_string.empty()) {
  717. size_t node_mem_offset = iter->second.mem_offset_;
  718. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  719. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  720. if (status != SUCCESS) {
  721. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  722. return FAILED;
  723. }
  724. iter->second.mem_offset_ = node_mem_offset;
  725. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  726. GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.",
  727. iter->second.mem_offset_, memory_type);
  728. } else {
  729. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  730. string current_node_full_name = op_desc->GetName();
  731. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  732. if (pos == string::npos) {
  733. std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) +
  734. " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName());
  735. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  736. return FAILED;
  737. }
  738. string fixed_name = current_node_full_name.substr(0, pos);
  739. vector<NodePtr> parallel_virtual_output_nodes;
  740. if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  741. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  742. }
  743. parallel_virtual_output_nodes.emplace_back(n);
  744. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  745. }
  746. }
  747. }
  748. int32_t mem_reuse_model = 1;
  749. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  750. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  751. return FAILED;
  752. }
  753. return SUCCESS;
  754. }
  755. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  756. int32_t mem_reuse_model) {
  757. // Find max batch label value
  758. string max_batch_label;
  759. GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label),
  760. "Get max batch label failed.");
  761. PrintMemoryOffset();
  762. vector<size_t> nodes_mem_offset_list;
  763. for (auto &i_map : mem_reuse_nodes_map) {
  764. vector<NodePtr> virtual_nodes_list = i_map.second;
  765. int64_t memory_type = RT_MEMORY_HBM;
  766. GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type),
  767. "Get node list memory type failed.");
  768. auto iter = memory_offset_.find(memory_type);
  769. if (iter == memory_offset_.end()) {
  770. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  771. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  772. return FAILED;
  773. }
  774. size_t max_batch_node_mem_offset = iter->second.mem_offset_;
  775. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  776. for (auto &i_node : virtual_nodes_list) {
  777. // Op_desc is not nullptr, it has been checked.
  778. OpDescPtr op_desc = i_node->GetOpDesc();
  779. string batch_label_string;
  780. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  781. (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  782. if (batch_label_string == max_batch_label) {
  783. Status status = SUCCESS;
  784. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  785. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  786. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  787. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  788. } else {
  789. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  790. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  791. return FAILED;
  792. }
  793. if (status != SUCCESS) {
  794. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  795. return FAILED;
  796. }
  797. iter->second.mem_offset_ = max_batch_node_mem_offset;
  798. AlignMemOffset(MEM_ALIGN_SIZE, memory_type);
  799. GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.",
  800. iter->second.mem_offset_, memory_type);
  801. // Only assign memory of max batch nodes.
  802. break;
  803. }
  804. }
  805. }
  806. PrintMemoryOffset();
  807. size_t memory_reuse_index = 0;
  808. for (auto &i_map : mem_reuse_nodes_map) {
  809. vector<NodePtr> virtual_nodes_list = i_map.second;
  810. for (auto &i_node : virtual_nodes_list) {
  811. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  812. Status status = SUCCESS;
  813. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  814. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  815. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  816. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  817. } else {
  818. std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model);
  819. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  820. return FAILED;
  821. }
  822. if (status != SUCCESS) {
  823. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  824. return FAILED;
  825. }
  826. }
  827. memory_reuse_index++;
  828. }
  829. return SUCCESS;
  830. }
  831. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  832. // key:dynamic batch, batch name
  833. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  834. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  835. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  836. if (status != SUCCESS) {
  837. GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
  838. return status;
  839. }
  840. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  841. if (mem_iter == memory_offset_.end()) {
  842. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  843. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  844. return FAILED;
  845. }
  846. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  847. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  848. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  849. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  850. for (auto &iter : iter_batch.second) {
  851. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  852. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  853. for (auto &atomic_node : iter.second) {
  854. vector<int64_t> mem_offset_end;
  855. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  856. if (status != SUCCESS) {
  857. GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
  858. atomic_node->GetName().c_str());
  859. return status;
  860. }
  861. }
  862. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  863. GE_CHECK_NOTNULL(mem_assigner_);
  864. GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger());
  865. if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) {
  866. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}),
  867. "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
  868. }
  869. }
  870. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  871. }
  872. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  873. batch_atomic_mem_start = batch_max_mem_offset;
  874. for (auto &iter_batch : connecting_output_atomic_nodes) {
  875. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  876. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  877. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  878. return FAILED;
  879. }
  880. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  881. }
  882. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  883. return SUCCESS;
  884. }
  885. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  886. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  887. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  888. GE_CHECK_NOTNULL(compute_graph_);
  889. for (const auto &node : compute_graph_->GetAllNodes()) {
  890. if (node->GetType() == ATOMICADDRCLEAN) {
  891. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  892. const auto &out_control_anchor = node->GetOutControlAnchor();
  893. GE_CHECK_NOTNULL(out_control_anchor);
  894. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  895. if (peer_in_control_anchor != nullptr) {
  896. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  897. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  898. if (peer_in_node_desc != nullptr) {
  899. bool is_atomic_node = false;
  900. // If GetBool fail, is_atomic_node is false.
  901. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  902. if (is_atomic_node) {
  903. bool is_reference = false;
  904. // If GetBool fail, is_reference is false.
  905. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  906. if (is_reference) {
  907. std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) +
  908. " cannot have both atomic and is_reference attribute.";
  909. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  910. return ge::PARAM_INVALID;
  911. }
  912. std::string batch_label;
  913. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  914. vector<int> is_connecting_output;
  915. // If GetBool fail, attr is_connecting_output is an empty vector.
  916. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  917. if (is_connecting_output.empty()) {
  918. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  919. continue;
  920. }
  921. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  922. tmp_normal_atomic_nodes[batch_label].clear();
  923. break;
  924. }
  925. }
  926. }
  927. }
  928. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  929. if (!it_atomic_node.second.empty()) {
  930. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  931. }
  932. }
  933. }
  934. }
  935. return SUCCESS;
  936. }
  937. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  938. vector<int64_t> &mem_offset_end) {
  939. auto node_op_desc = node->GetOpDesc();
  940. // Assign atomic node output memory
  941. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  942. if (ret != SUCCESS) {
  943. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  944. return ret;
  945. }
  946. // Check and assign atomic node workspace memory
  947. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  948. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  949. if (!atomic_workspace_info.empty()) {
  950. bool is_fusion_node = false;
  951. // If GetBool fail, is_fusion_node is false.
  952. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  953. if (is_fusion_node) {
  954. // Assign fusion atomic node workspace memory
  955. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  956. } else {
  957. // Assign single ordinary atomic node workspace memory, not include fusion node
  958. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  959. }
  960. if (ret != SUCCESS) {
  961. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  962. return ret;
  963. }
  964. } else {
  965. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  966. }
  967. return SUCCESS;
  968. }
  969. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  970. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  971. if (iter == memory_offset_.end()) {
  972. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  973. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  974. return FAILED;
  975. }
  976. for (auto &node : connect_netoutput_nodes) {
  977. GE_CHECK_NOTNULL(node);
  978. if (node->GetOpDesc() == nullptr) {
  979. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  980. continue;
  981. }
  982. // Atomic memory start addr
  983. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  984. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  985. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  986. vector<int64_t> mem_offset_end;
  987. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  988. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  989. return FAILED;
  990. }
  991. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  992. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  993. GELOGE(FAILED, "Failed to set atomic attr separately.");
  994. return FAILED;
  995. }
  996. }
  997. return SUCCESS;
  998. }
  999. Status GraphMemoryAssigner::AssignReferenceMemory() {
  1000. for (auto &node : compute_graph_->GetDirectNode()) {
  1001. // Get the reference type of the node, default is false
  1002. bool is_ref = false;
  1003. // If GetBool fail, is_ref is false.
  1004. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  1005. if (!is_ref) {
  1006. continue;
  1007. }
  1008. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  1009. node->GetName().c_str());
  1010. auto out_op_desc = node->GetOpDesc();
  1011. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  1012. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  1013. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  1014. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  1015. out_op_desc->GetOutputsSize(), output_list.size());
  1016. return ge::FAILED;
  1017. }
  1018. map<string, int> input_name_index;
  1019. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  1020. int index = out_op_desc->GetInputIndexByName(input_name);
  1021. input_name_index.emplace(input_name, index);
  1022. }
  1023. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1024. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  1025. auto iter = input_name_index.find(out_data_anchor_name);
  1026. if (iter != input_name_index.end()) {
  1027. int index = iter->second;
  1028. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  1029. iter->first.c_str(), out_data_anchor_name.c_str());
  1030. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  1031. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  1032. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  1033. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  1034. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  1035. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  1036. GE_CHECK_NOTNULL(peer_out_op_desc);
  1037. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  1038. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  1039. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  1040. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  1041. } else {
  1042. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  1043. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  1044. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  1045. }
  1046. }
  1047. out_op_desc->SetOutputOffset(output_list);
  1048. }
  1049. return ge::SUCCESS;
  1050. }
  1051. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  1052. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  1053. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  1054. if (peer_out_data_anchor == nullptr) {
  1055. continue;
  1056. }
  1057. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  1058. if (peer_op_desc == nullptr) {
  1059. continue;
  1060. }
  1061. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  1062. (peer_op_desc->GetType() == VARIABLE)) {
  1063. std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" +
  1064. FmtToStr(peer_op_desc->GetName()) + " is invalid, only support Constant/AippData/Variable";
  1065. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1066. return false;
  1067. }
  1068. }
  1069. return true;
  1070. }
  1071. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  1072. auto op_desc = node->GetOpDesc();
  1073. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1074. mem_offset_end.clear();
  1075. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1076. vector<int64_t> atomic_output_index;
  1077. // If GetListInt fail, atomic_output_index is empty.
  1078. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1079. // Check atomic output
  1080. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1081. if (atomic_output_index.size() > output_list.size()) {
  1082. std::string error = "Op" + FmtToStr(node->GetName()) +
  1083. "'s size of atomic_output_index is more than the size of output_list";
  1084. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1085. return ge::FAILED;
  1086. }
  1087. auto output_list_size = static_cast<int64_t>(output_list.size());
  1088. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1089. if (iter == memory_offset_.end()) {
  1090. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1091. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1092. return FAILED;
  1093. }
  1094. for (auto &output_index : atomic_output_index) {
  1095. if (output_index >= output_list_size) {
  1096. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  1097. " is more than the size" + FmtToStr(output_list_size) + " of output_list.";
  1098. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1099. return ge::PARAM_INVALID;
  1100. }
  1101. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1102. bool is_assigned_mem = false;
  1103. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1104. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  1105. return ge::FAILED;
  1106. }
  1107. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1108. if (is_assigned_mem) {
  1109. GELOGI(
  1110. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1111. "ReAssignContinuousMemory function.",
  1112. op_desc->GetName().c_str());
  1113. continue;
  1114. }
  1115. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1116. int64_t size = 0;
  1117. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1118. GELOGI("Get size failed");
  1119. }
  1120. output_list[output_index] = iter->second.mem_offset_;
  1121. std::string batch_label;
  1122. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1123. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld]"
  1124. " batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index,
  1125. iter->second.mem_offset_, op_desc->GetStreamId(), size, size, batch_label.c_str());
  1126. iter->second.mem_offset_ += size;
  1127. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1128. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1129. }
  1130. op_desc->SetOutputOffset(output_list);
  1131. return ge::SUCCESS;
  1132. }
  1133. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1134. bool &is_mem_assigned) {
  1135. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1136. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  1137. " is more than the size of node's AllOutDataAnchors.";
  1138. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1139. return ge::PARAM_INVALID;
  1140. }
  1141. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1142. GE_CHECK_NOTNULL(out_data_anchor);
  1143. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1144. for (auto &input_anchor : input_anchors) {
  1145. auto output_node = input_anchor->GetOwnerNode();
  1146. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1147. /// has been assigned
  1148. vector<int64_t> atomic_input_index;
  1149. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1150. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1151. is_mem_assigned = true;
  1152. break;
  1153. }
  1154. }
  1155. return SUCCESS;
  1156. }
  1157. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1158. map<string, map<int64_t, int64_t>> &workspace_info,
  1159. vector<int64_t> &mem_offset_end) {
  1160. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1161. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1162. if (mem_type_iter == memory_offset_.end()) {
  1163. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1164. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1165. return FAILED;
  1166. }
  1167. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1168. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1169. if (op_desc->GetName() != iter->first) {
  1170. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1171. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1172. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1173. return ge::PARAM_INVALID;
  1174. }
  1175. if (iter->second.empty()) {
  1176. continue;
  1177. }
  1178. for (auto &info_iter : iter->second) {
  1179. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1180. auto workspace_size = info_iter.second;
  1181. if (workspace_index >= workspace_vector.size()) {
  1182. std::string error = "The workspace index" + FmtToStr(workspace_index) +
  1183. " is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector.";
  1184. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1185. return ge::PARAM_INVALID;
  1186. }
  1187. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1188. std::string batch_label;
  1189. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1190. GELOGI(
  1191. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1192. "size[%ld] real_size[%ld] batch[%s].",
  1193. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1194. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size,
  1195. batch_label.c_str());
  1196. mem_type_iter->second.mem_offset_ += workspace_size;
  1197. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1198. }
  1199. }
  1200. op_desc->SetWorkspace(workspace_vector);
  1201. return SUCCESS;
  1202. }
  1203. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1204. map<string, map<int64_t, int64_t>> &workspace_info,
  1205. vector<int64_t> &mem_offset_end) {
  1206. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1207. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1208. if (mem_type_iter == memory_offset_.end()) {
  1209. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  1210. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1211. return FAILED;
  1212. }
  1213. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1214. for (auto &iter : workspace_info) {
  1215. if (iter.second.empty()) {
  1216. continue;
  1217. }
  1218. map<int64_t, int64_t> index_offset;
  1219. for (auto &info_iter : iter.second) {
  1220. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1221. auto workspace_size = info_iter.second;
  1222. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1223. std::string batch_label;
  1224. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1225. GELOGI(
  1226. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1227. "real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index,
  1228. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size,
  1229. batch_label.c_str());
  1230. mem_type_iter->second.mem_offset_ += workspace_size;
  1231. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1232. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1233. }
  1234. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1235. }
  1236. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1237. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1238. return FAILED;
  1239. }
  1240. return SUCCESS;
  1241. }
  1242. Status GraphMemoryAssigner::CheckOffset() {
  1243. std::map<std::string, std::string> anchor_to_symbol;
  1244. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1245. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1246. GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
  1247. return FAILED;
  1248. }
  1249. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1250. GE_CHECK_NOTNULL(node->GetOpDesc());
  1251. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1252. for (auto input : input_list) {
  1253. if (input == ge::kInvalidOffset) {
  1254. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1255. + " in node" + FmtToStr(node->GetName());
  1256. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1257. return FAILED;
  1258. }
  1259. }
  1260. bool need_update_output = false;
  1261. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1262. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1263. if (output_list[i] == ge::kInvalidOffset) {
  1264. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1265. + " in node" + FmtToStr(node->GetName());
  1266. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1267. return FAILED;
  1268. }
  1269. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1270. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1271. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1272. output_list[i] = symbol_offset;
  1273. need_update_output = true;
  1274. }
  1275. }
  1276. }
  1277. if (need_update_output) {
  1278. node->GetOpDesc()->SetOutputOffset(output_list);
  1279. }
  1280. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1281. for (auto workspace : workspace_list) {
  1282. if (workspace == ge::kInvalidOffset) {
  1283. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1284. + " in node" + FmtToStr(node->GetName());
  1285. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1286. GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1287. return FAILED;
  1288. }
  1289. }
  1290. }
  1291. return SUCCESS;
  1292. }
  1293. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1294. if (memory_offset_.empty()) {
  1295. GELOGE(FAILED, "memory_offset_ is empty.");
  1296. return FAILED;
  1297. }
  1298. for (auto pair : memory_offset_) {
  1299. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(),
  1300. pair.second.mem_offset_, pair.first);
  1301. }
  1302. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1303. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1304. GELOGE(ge::FAILED, "Update op input offset failed");
  1305. return ge::FAILED;
  1306. }
  1307. }
  1308. return ge::SUCCESS;
  1309. }
  1310. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1311. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1312. return node;
  1313. }
  1314. if (NodeUtils::IsDynamicShape(node)) {
  1315. return node;
  1316. }
  1317. return NodeUtils::GetParentInput(node);
  1318. }
  1319. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1320. uint32_t parent_index = 0;
  1321. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1322. return SUCCESS;
  1323. }
  1324. // Subgraph Data Node, check for constant input.
  1325. std::string op_type;
  1326. const auto &in_node = NodeUtils::GetParentInput(node);
  1327. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1328. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1329. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1330. return SUCCESS; // Constant input.
  1331. }
  1332. // Memory allocated for dynamic shape subgraph Data.
  1333. if (NodeUtils::IsDynamicShape(node)) {
  1334. return SUCCESS;
  1335. }
  1336. const auto &owner = node->GetOwnerComputeGraph();
  1337. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1338. const auto parent_inputs = parent_desc->GetInputOffset();
  1339. if (parent_inputs.size() <= parent_index) {
  1340. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1341. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1342. FmtToStr(parent_index);
  1343. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1344. return FAILED;
  1345. }
  1346. input_list = {parent_inputs[parent_index]};
  1347. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1348. return SUCCESS;
  1349. }
  1350. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1351. vector<int64_t> origin_input_list;
  1352. vector<int64_t> memory_type;
  1353. auto tmp_op_desc = node->GetOpDesc();
  1354. origin_input_list = tmp_op_desc->GetInputOffset();
  1355. int64_t valid_input_index = 0;
  1356. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1357. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1358. vector<int64_t> output_list;
  1359. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1360. if (peer_out_anchor == nullptr) {
  1361. continue;
  1362. }
  1363. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1364. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1365. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1366. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1367. output_list = last_peer_out_op_desc->GetOutputOffset();
  1368. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1369. if (output_list.size() > static_cast<size_t>(out_index)) {
  1370. int64_t input_offset = output_list.at(out_index);
  1371. if (has_mem_type_attr && !origin_input_list.empty()) {
  1372. auto input_size = tmp_op_desc->GetInputsSize();
  1373. auto ori_input_offset_list_size = origin_input_list.size();
  1374. auto mem_type_size = memory_type.size();
  1375. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1376. std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) +
  1377. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1378. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1379. FmtToStr(ori_input_offset_list_size);
  1380. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1381. return ge::FAILED;
  1382. }
  1383. // not hbm keep orignal inputoffest
  1384. // hbm inputoffset = original inputoffset + outputoffset
  1385. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1386. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1387. }
  1388. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1389. if (in_node->GetType() == CONSTANT) {
  1390. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1391. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1392. }
  1393. GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]",
  1394. has_mem_type_attr ? "Fusion" : "",
  1395. tmp_op_desc->GetName().c_str(),
  1396. valid_input_index,
  1397. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1398. out_index,
  1399. input_offset);
  1400. input_list.emplace_back(input_offset);
  1401. valid_input_index++;
  1402. }
  1403. }
  1404. return ge::SUCCESS;
  1405. }
  1406. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1407. GE_CHECK_NOTNULL(node->GetOpDesc());
  1408. vector<int64_t> input_list;
  1409. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1410. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1411. vector<int64_t> output_list;
  1412. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1413. if (peer_out_anchor == nullptr) {
  1414. continue;
  1415. }
  1416. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1417. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1418. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1419. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1420. if (last_peer_out_node->GetType() != VARIABLE) {
  1421. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1422. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1423. output_list = last_peer_out_op_desc->GetOutputOffset();
  1424. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1425. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1426. }
  1427. } else {
  1428. vector<int64_t> cur_node_input_list;
  1429. auto cur_node_op_desc = node->GetOpDesc();
  1430. GE_CHECK_NOTNULL(cur_node_op_desc);
  1431. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1432. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1433. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1434. }
  1435. }
  1436. }
  1437. } else if (node->GetType() == DATA_TYPE) {
  1438. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1439. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1440. return FAILED;
  1441. }
  1442. } else {
  1443. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1444. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1445. return FAILED;
  1446. }
  1447. }
  1448. node->GetOpDesc()->SetInputOffset(input_list);
  1449. return SUCCESS;
  1450. }
  1451. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1452. const vector<int64_t> &mem_offset_end) {
  1453. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1454. // Parsing offset and size vectors
  1455. vector<int64_t> memory_offset_start;
  1456. vector<int64_t> memory_offset_size;
  1457. memory_offset_start.emplace_back(atomic_mem_start);
  1458. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1459. memory_offset_start.emplace_back(mem_offset_end[i]);
  1460. // Number 1 means element index
  1461. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1462. memory_offset_size.emplace_back(size);
  1463. }
  1464. memory_offset_start.pop_back();
  1465. const auto &in_control_anchor = node->GetInControlAnchor();
  1466. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1467. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1468. if (peer_out_control_anchor == nullptr) {
  1469. continue;
  1470. }
  1471. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1472. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1473. if (peer_out_node_desc == nullptr) {
  1474. continue;
  1475. }
  1476. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1477. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1478. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1479. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1480. GELOGE(FAILED, "Set atomic clean attr failed.");
  1481. return FAILED;
  1482. }
  1483. }
  1484. }
  1485. }
  1486. return SUCCESS;
  1487. }
  1488. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1489. const vector<int64_t> &atomic_mem_size) {
  1490. auto node_op_desc = node->GetOpDesc();
  1491. if (node_op_desc != nullptr) {
  1492. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1493. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1494. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1495. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1496. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1497. node_op_desc->SetWorkspace(workspace_vector);
  1498. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1499. std::vector<int64_t> mem_start_vector;
  1500. // If GetListInt fail, mem_start_vector is empty.
  1501. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1502. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1503. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1504. GELOGE(FAILED, "SetListInt failed.");
  1505. return FAILED);
  1506. std::vector<int64_t> mem_size_vector;
  1507. // If GetListInt fail, mem_size_vector is empty.
  1508. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1509. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1510. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1511. GELOGE(FAILED, "SetListInt failed.");
  1512. return FAILED);
  1513. std::stringstream ss;
  1514. for (auto iter : atomic_mem_start) {
  1515. ss << iter << " ";
  1516. }
  1517. string atomic_mem_start_str = ss.str();
  1518. ss.clear();
  1519. ss.str("");
  1520. for (auto iter : atomic_mem_size) {
  1521. ss << iter << " ";
  1522. }
  1523. string atomic_mem_size_str = ss.str();
  1524. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]",
  1525. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1526. atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str());
  1527. }
  1528. return SUCCESS;
  1529. }
  1530. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1531. if (mem_align_size <= 0) {
  1532. return;
  1533. }
  1534. auto iter = memory_offset_.find(memory_type);
  1535. if (iter == memory_offset_.end()) {
  1536. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1537. return;
  1538. }
  1539. iter->second.mem_offset_ =
  1540. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1541. }
  1542. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1543. int64_t &memory_type) {
  1544. memory_type = RT_MEMORY_HBM;
  1545. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1546. for (auto &n : nodes) {
  1547. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1548. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.")
  1549. break;
  1550. }
  1551. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1552. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  1553. break;
  1554. }
  1555. }
  1556. return SUCCESS;
  1557. }
  1558. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1559. memory_type = RT_MEMORY_HBM;
  1560. vector<int64_t> mem_type_list;
  1561. if (input_or_output == "input") {
  1562. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1563. }
  1564. if (input_or_output == "output") {
  1565. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1566. }
  1567. if (mem_type_list.empty()) {
  1568. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1569. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1570. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1571. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1572. return FAILED;
  1573. }
  1574. return SUCCESS;
  1575. }
  1576. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1577. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1578. " of mem type list is not equal to the size of in data anchor" +
  1579. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1580. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1581. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1582. return FAILED;
  1583. }
  1584. if (!CheckContinuousMemType(mem_type_list)) {
  1585. GELOGE(FAILED, "Check continuous memory type failed.");
  1586. return FAILED;
  1587. }
  1588. // It is continuous memory and memory type is the same, so use the first memory.
  1589. memory_type = mem_type_list[0];
  1590. return SUCCESS;
  1591. }
  1592. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1593. if (mem_type_list.size() == 0) {
  1594. return true;
  1595. }
  1596. int64_t mem_type_tmp = mem_type_list[0];
  1597. for (auto mem_type : mem_type_list) {
  1598. if (mem_type != mem_type_tmp) {
  1599. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1600. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1601. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1602. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1603. mem_type_tmp, mem_type);
  1604. return false;
  1605. }
  1606. }
  1607. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1608. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1609. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1610. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1611. return false;
  1612. }
  1613. return true;
  1614. }
  1615. void GraphMemoryAssigner::PrintMemoryOffset() {
  1616. for (auto pair : memory_offset_) {
  1617. // Assign memory of max batch nodes that have the same batch label.
  1618. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1619. pair.first, pair.second.mem_offset_);
  1620. }
  1621. }
  1622. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示