You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 107 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. #include "graph/build/memory/buffer_pool_mem_assigner.h"
  33. namespace {
  34. const int kAllInputAddrIsAtomic = -1;
  35. const int kVirtualInputNodeMemoryReuse = 0;
  36. const int kVirtualOutputNodeMemoryReuse = 1;
  37. const int kPrevNextDistanceNum = 2;
  38. const int64_t kInvalidStream = -1;
  39. const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
  40. // One state per bit cannot be repeated
  41. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  42. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  43. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  44. const ge::NodePtr &node, const uint32_t i) {
  45. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  46. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  47. if (iter1 == anchor_to_symbol.end()) {
  48. return ge::kInvalidOffset;
  49. }
  50. auto out_symbol = iter1->second;
  51. auto iter2 = symbol_to_anchors.find(out_symbol);
  52. if (iter2 == symbol_to_anchors.end()) {
  53. return ge::kInvalidOffset;
  54. }
  55. for (const auto &node_index_io : iter2->second) {
  56. if (node_index_io.value_ == out_symbol) {
  57. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  58. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  59. if (node_index_io.index_ >= symbol_output_list.size()) {
  60. return ge::kInvalidOffset;
  61. }
  62. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  63. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  64. return symbol_output_list.at(node_index_io.index_);
  65. }
  66. }
  67. return ge::kInvalidOffset;
  68. }
  69. } // namespace
  70. namespace ge {
  71. Status VariableMemoryAssigner::Assign() {
  72. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  73. if (result != ge::SUCCESS) {
  74. return result;
  75. }
  76. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  77. if (result != ge::SUCCESS) {
  78. return result;
  79. }
  80. return ge::SUCCESS;
  81. }
  82. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  83. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  84. if (result != ge::SUCCESS) {
  85. return result;
  86. }
  87. return ge::SUCCESS;
  88. }
  89. Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
  90. Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
  91. if (result != ge::SUCCESS) {
  92. return result;
  93. }
  94. return ge::SUCCESS;
  95. }
  96. Status GraphMemoryAssigner::AssignMemory() {
  97. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  98. if (mem_assigner->Assign() != ge::SUCCESS) {
  99. GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s",
  100. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  101. return ge::FAILED;
  102. }
  103. for (auto pair : mem_assigner->GetMemOffsets()) {
  104. MemoryOffset offset(pair.first, pair.second);
  105. memory_offset_.emplace(pair.first, offset);
  106. }
  107. // base memtype offset must be exist
  108. auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM);
  109. if (it == mem_assigner->GetMemOffsets().end()) {
  110. MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
  111. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  112. }
  113. it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR);
  114. if (it == mem_assigner->GetMemOffsets().end()) {
  115. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0);
  116. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  117. }
  118. auto session_id = compute_graph_->GetSessionID();
  119. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  120. auto variable_assigner =
  121. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  122. if (variable_assigner == nullptr) {
  123. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  124. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  125. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  126. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  127. return ge::FAILED;
  128. }
  129. if (variable_assigner->Assign() != ge::SUCCESS) {
  130. return ge::FAILED;
  131. }
  132. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  133. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  134. mem_assigner_ = std::move(mem_assigner);
  135. return ge::SUCCESS;
  136. }
  137. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  138. auto variable_assigner =
  139. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  140. if (variable_assigner == nullptr) {
  141. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  142. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  143. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  144. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  145. return ge::FAILED;
  146. }
  147. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  148. return ge::FAILED;
  149. }
  150. return ge::SUCCESS;
  151. }
  152. ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
  153. auto variable_assigner =
  154. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  155. if (variable_assigner == nullptr) {
  156. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  157. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  158. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  159. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  160. }
  161. if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
  162. return ge::FAILED;
  163. }
  164. return ge::SUCCESS;
  165. }
  166. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  167. int64_t dim_index, int64_t &output_mem_size,
  168. int64_t &batch_dim_num, int64_t &out_size) {
  169. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  170. if (graph_status != GRAPH_SUCCESS) {
  171. GELOGE(FAILED, "[Get][TensorSize]");
  172. REPORT_CALL_ERROR("E19999", "Get tensor size failed");
  173. return FAILED;
  174. }
  175. GeShape output_shape = output_desc->GetShape();
  176. std::vector<int64_t> output_dims = output_shape.GetDims();
  177. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  178. REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s",
  179. dim_index, output_dims.size(), output_shape.ToString().c_str());
  180. GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s",
  181. dim_index, output_dims.size(), output_shape.ToString().c_str());
  182. return FAILED;
  183. }
  184. for (int64_t index = 0; index < dim_index; index++) {
  185. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  186. batch_dim_num *= output_dims[index];
  187. output_dims[index] = 1;
  188. }
  189. output_shape = GeShape(output_dims);
  190. Format out_format = output_desc->GetFormat();
  191. DataType data_type = output_desc->GetDataType();
  192. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  193. if (graph_status != GRAPH_SUCCESS) {
  194. GELOGE(graph_status, "[Calc][TensorSize]");
  195. return FAILED;
  196. }
  197. if (output_mem_size < 0) {
  198. REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. "
  199. "shape:%s, format:%s, dtype:%s, maybe has dynamic shape",
  200. output_mem_size,
  201. output_shape.ToString().c_str(),
  202. TypeUtils::FormatToSerialString(out_format).c_str(),
  203. TypeUtils::DataTypeToSerialString(data_type).c_str());
  204. GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, "
  205. "maybe has dynamic shape",
  206. output_mem_size,
  207. output_shape.ToString().c_str(),
  208. TypeUtils::FormatToSerialString(out_format).c_str(),
  209. TypeUtils::DataTypeToSerialString(data_type).c_str());
  210. return FAILED;
  211. }
  212. return SUCCESS;
  213. }
  214. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) {
  215. if (memory_offset_.empty()) {
  216. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  217. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  218. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  219. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  220. return ge::FAILED;
  221. }
  222. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph),
  223. "[ReAssign][ContinuousMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  224. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph),
  225. "[ReAssign][AtomicMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  226. GE_CHK_STATUS_RET(AssignBufferPoolMemory(),
  227. "[Assign][BufferPoolMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  228. size_t total_mem_offset = 0;
  229. for (auto pair : memory_offset_) {
  230. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  231. total_mem_offset += pair.second.mem_offset_;
  232. }
  233. auto session_id = compute_graph_->GetSessionID();
  234. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  235. GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, "
  236. "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem",
  237. total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(),
  238. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  239. for (auto iter : mem_type_to_offset) {
  240. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  241. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  242. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  243. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  244. iter.second, iter.first);
  245. }
  246. return ge::FAILED;
  247. }
  248. return SUCCESS;
  249. }
  250. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  251. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  252. if (priority_assigner == nullptr) {
  253. REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
  254. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  255. GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, "
  256. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  257. return ge::FAILED;
  258. }
  259. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  260. // set offset for zero copy block
  261. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  262. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  263. continue;
  264. }
  265. memory_block->Resize();
  266. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  267. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  268. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  269. }
  270. // set offset for zero copy nodes
  271. priority_assigner->SetOpMemOffset(true);
  272. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  273. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  274. if (iter == memory_offset_.end()) {
  275. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  276. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  277. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  278. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  279. return FAILED;
  280. }
  281. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  282. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  283. zero_mem_copy_size);
  284. return SUCCESS;
  285. }
  286. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  287. if (op_desc == nullptr) {
  288. return 0;
  289. };
  290. bool is_continuous = false;
  291. uint32_t continuous_type = 0;
  292. // If GetBool fail, is_continuous is false.
  293. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  294. if (is_continuous) {
  295. continuous_type |= kTypeInput;
  296. } else {
  297. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  298. if (is_continuous) {
  299. bool attr_reuse = false;
  300. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  301. if (attr_reuse) {
  302. continuous_type |= kTypeInputNoPadding;
  303. }
  304. }
  305. }
  306. is_continuous = false;
  307. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  308. if (is_continuous) {
  309. continuous_type |= kTypeOutput;
  310. } else {
  311. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  312. if (is_continuous) {
  313. bool attr_reuse = false;
  314. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  315. if (attr_reuse) {
  316. continuous_type |= kTypeOutputNoPadding;
  317. }
  318. }
  319. }
  320. if (continuous_type != 0) {
  321. GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type);
  322. }
  323. return continuous_type;
  324. }
  325. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  326. int64_t &tensor_size, int64_t &nopadding_size) {
  327. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  328. REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, not expected");
  329. GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr");
  330. }
  331. tensor_size = 0;
  332. nopadding_size = 0;
  333. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  334. if (is_nopadding) {
  335. int64_t attr_dim_index;
  336. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  337. if (!get_attr_dim_flag) {
  338. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed, op_name:%s",
  339. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  340. GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s",
  341. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  342. return FAILED;
  343. }
  344. // Calculate tensor real size of each piece of data and out size of complete data
  345. int64_t batch_dim_num = 1;
  346. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  347. SUCCESS) {
  348. REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s",
  349. attr_dim_index, op_desc->GetName().c_str());
  350. GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld",
  351. op_desc->GetName().c_str(), attr_dim_index);
  352. return FAILED;
  353. }
  354. } else {
  355. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  356. REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str());
  357. GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str());
  358. return FAILED;
  359. }
  360. }
  361. if ((tensor_size < 0) || (nopadding_size < 0)) {
  362. REPORT_INNER_ERROR("E19999", "GetMemorySize fail, "
  363. "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  364. tensor_size, nopadding_size, op_desc->GetName().c_str());
  365. GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  366. tensor_size, nopadding_size, op_desc->GetName().c_str());
  367. return FAILED;
  368. }
  369. return SUCCESS;
  370. }
  371. void AlignMemOffset(int64_t &mem_align_size) {
  372. if (mem_align_size <= 0) {
  373. return;
  374. }
  375. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  376. }
  377. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  378. bool is_peer_output_continuous = false;
  379. // If GetBool fail, is_peer_output_continuous is false.
  380. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  381. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  382. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  383. // conflict between the two, we can not support it.
  384. auto peer_output_size = peer_op_desc->GetOutputsSize();
  385. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  386. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  387. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  388. " requires continuous output. There may be conflict between the two." +
  389. "This node is not supported now.";
  390. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  391. return true;);
  392. bool is_peer_reference = false;
  393. // If GetBool fail, is_peer_reference is false.
  394. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  395. GE_IF_BOOL_EXEC(is_peer_reference,
  396. std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  397. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  398. " is ref. There may be conflict between the two.";
  399. GELOGW("%s", warning.c_str());
  400. return false;);
  401. return false;
  402. }
  403. /// op1 -> node -> op2
  404. /// return true when node is ref from input, and op1 or op2 is reuse input from output
  405. bool GraphMemoryAssigner::IsRefFromInputOpCascade(const NodePtr &node) {
  406. bool ref_from_input = false;
  407. int32_t reuse_in_index = -1;
  408. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  409. ref_from_input = GraphUtils::IsRefFromInput(out_anchor, reuse_in_index);
  410. if (ref_from_input) {
  411. GELOGD("IsRefFromInputOpCascade: cur node:%s:%d is ref", node->GetName().c_str(), reuse_in_index);
  412. break;
  413. }
  414. }
  415. for (const auto &in_anchor : node->GetAllInDataAnchors()) {
  416. const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor();
  417. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  418. if (ref_from_input && GraphUtils::IsRefFromInput(peer_out_anchor, reuse_in_index)) {
  419. GELOGD("IsRefFromInputOpCascade: in node[%s] is ref, reuse index is:%d",
  420. peer_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index);
  421. return true;
  422. }
  423. }
  424. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  425. const auto &peer_in_anchors = out_anchor->GetPeerInDataAnchors();
  426. for (const auto &peer_in_anchor : peer_in_anchors) {
  427. auto peer_in_node = peer_in_anchor->GetOwnerNode();
  428. GE_IF_BOOL_EXEC(peer_in_node == nullptr, continue);
  429. for (const auto &peer_in_node_out_anchor : peer_in_node->GetAllOutDataAnchors()) {
  430. if (ref_from_input && GraphUtils::IsRefFromInput(peer_in_node_out_anchor, reuse_in_index)) {
  431. GELOGD("IsRefFromInputOpCascade: out node[%s] is ref, reuse index is:%d",
  432. peer_in_node_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index);
  433. return true;
  434. }
  435. }
  436. }
  437. }
  438. return false;
  439. }
  440. /// node:in0(in0 reuse out0) -> peer_node:out0
  441. /// update peer_node's 0th output offset with node's 0th output offset
  442. Status GraphMemoryAssigner::UpdateRefOpOffsetReverse(const NodePtr &node) {
  443. map<int32_t, int32_t> out2ins;
  444. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node:%s",
  445. node->GetName().c_str());
  446. auto op_desc = node->GetOpDesc();
  447. GE_CHECK_NOTNULL(op_desc);
  448. vector<int64_t> output_list = op_desc->GetOutputOffset();
  449. for (const auto &out2in : out2ins) {
  450. auto reuse_in_anchor = node->GetInDataAnchor(out2in.second);
  451. GE_CHECK_NOTNULL(reuse_in_anchor);
  452. auto peer_out_anchor = reuse_in_anchor->GetPeerOutAnchor();
  453. GE_CHECK_NOTNULL(peer_out_anchor);
  454. auto peer_node = peer_out_anchor->GetOwnerNode();
  455. GE_CHECK_NOTNULL(peer_node);
  456. auto peer_op_desc = peer_node->GetOpDesc();
  457. GE_CHECK_NOTNULL(peer_op_desc);
  458. vector<int64_t> peer_output_list = peer_op_desc->GetOutputOffset();
  459. if ((peer_out_anchor->GetIdx() >= static_cast<int>(peer_output_list.size()))
  460. || (out2in.first >= static_cast<int32_t>(output_list.size()))) {
  461. GELOGW("out of range, peer_out_anchor:%d, peer_output_list size:%zu, out2in:%d, output_list size:%zu",
  462. peer_out_anchor->GetIdx(),
  463. peer_output_list.size(),
  464. out2in.first,
  465. output_list.size());
  466. continue;
  467. }
  468. peer_output_list.at(peer_out_anchor->GetIdx()) = output_list.at(out2in.first);
  469. peer_op_desc->SetOutputOffset(peer_output_list);
  470. GELOGD("UpdateRefOpOffsetReverse: Node[%s] output[%d] is set from node[%s] output index[%d] offset[%ld]",
  471. peer_node->GetName().c_str(),
  472. peer_out_anchor->GetIdx(),
  473. node->GetName().c_str(),
  474. out2in.first,
  475. output_list.at(out2in.first));
  476. }
  477. return SUCCESS;
  478. }
  479. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  480. Status ret;
  481. // Stored nodes which need assign continuous input memory in `reverse topo order`
  482. std::vector<NodePtr> nodes_stack;
  483. std::map<NodePtr, uint32_t> node_2_continuous_type;
  484. // Traverse nodes
  485. for (auto &node : compute_graph_->GetAllNodes()) {
  486. GE_CHECK_NOTNULL(node);
  487. uint32_t continuous_type;
  488. auto iter = node_2_continuous_type.find(node);
  489. if (iter == node_2_continuous_type.end()) {
  490. continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  491. node_2_continuous_type.emplace(node, continuous_type);
  492. } else {
  493. continuous_type = iter->second;
  494. }
  495. // Assign continuous input memory
  496. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  497. if (IsRefFromInputOpCascade(node)) {
  498. nodes_stack.push_back(node);
  499. GELOGD("Ref: Push node:%s to stack", node->GetName().c_str());
  500. } else if (continuous_input) {
  501. if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
  502. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
  503. "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str())
  504. } else {
  505. nodes_stack.push_back(node);
  506. GELOGD("Continuous: Push node:%s to stack", node->GetName().c_str());
  507. }
  508. }
  509. // Assign continuous output memory
  510. int64_t memory_type = RT_MEMORY_HBM;
  511. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  512. if (continuous_output) {
  513. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"),
  514. "[Get][MemType]fail for node:%s", node->GetName().c_str());
  515. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  516. if (ret != ge::SUCCESS) {
  517. GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str());
  518. return ret;
  519. }
  520. }
  521. }
  522. // Assign continuous input memory in `reverse topo order` which stored before
  523. while (!nodes_stack.empty()){
  524. auto node = nodes_stack.back();
  525. nodes_stack.pop_back();
  526. auto iter = node_2_continuous_type.find(node);
  527. if (iter == node_2_continuous_type.end()) {
  528. REPORT_INNER_ERROR("E19999", "Get ContinuousType from node_2_continuous_type map failed for node:%s",
  529. node->GetName().c_str());
  530. GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str());
  531. return FAILED;
  532. }
  533. if (((iter->second & kTypeInput) != 0) || ((iter->second & kTypeInputNoPadding) != 0)) {
  534. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
  535. "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str())
  536. } else {
  537. GE_CHK_STATUS_RET(UpdateRefOpOffsetReverse(node),
  538. "[Update][Memory:Reference:Output]fail for node:%s", node->GetName().c_str())
  539. }
  540. }
  541. for (auto pair : memory_offset_) {
  542. GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu", pair.first,
  543. pair.second.mem_offset_);
  544. }
  545. return ge::SUCCESS;
  546. }
  547. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  548. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  549. GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str());
  550. auto iter = memory_offset_.find(memory_type);
  551. if (iter == memory_offset_.end()) {
  552. REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
  553. "for node:%s, ", memory_type, node->GetName().c_str());
  554. GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
  555. memory_type, node->GetName().c_str());
  556. return FAILED;
  557. }
  558. // The head and tail of hcom continuous input should be added 512
  559. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  560. continuous_mem_start = iter->second.mem_offset_;
  561. int64_t mem_offset = iter->second.mem_offset_;
  562. int64_t extra_memory_size = 0;
  563. bool is_continuous_input_allocated = false;
  564. auto op_desc = node->GetOpDesc();
  565. GE_CHECK_NOTNULL(op_desc);
  566. vector<int64_t> output_list_this = op_desc->GetOutputOffset();
  567. if (output_list_this.empty()) {
  568. REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected",
  569. node->GetName().c_str());
  570. GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str());
  571. return FAILED;
  572. }
  573. (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  574. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  575. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  576. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  577. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  578. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  579. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  580. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  581. int64_t tensor_desc_size = 0;
  582. int64_t nopadding_size = 0;
  583. int64_t real_size = 0;
  584. std::vector<int64_t> offsets_of_fusion = {};
  585. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  586. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  587. if (lx_fusion) {
  588. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  589. std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) +
  590. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  591. " is out of range:" + FmtToStr(offsets_of_fusion.size());
  592. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  593. return FAILED;
  594. }
  595. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  596. tensor_desc_size = nopadding_size;
  597. } else {
  598. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  599. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  600. return FAILED;
  601. }
  602. }
  603. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  604. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  605. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  606. std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) +
  607. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  608. " is out of range:" + FmtToStr(output_list.size());
  609. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  610. return FAILED;
  611. }
  612. // when continuous input has been allocated first input is beginning offset
  613. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  614. if (is_allocated_first_input) {
  615. std::map<int32_t, int32_t> out2ins;
  616. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  617. // output is beginning offset, set offset for input; only support this case now
  618. if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
  619. auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
  620. output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
  621. peer_op_desc->SetOutputOffset(output_list);
  622. GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld",
  623. node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second,
  624. peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset);
  625. } else {
  626. GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
  627. out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
  628. }
  629. // first input is beginning offset
  630. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  631. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  632. } else {
  633. // set offset for input
  634. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  635. peer_op_desc->SetOutputOffset(output_list);
  636. }
  637. int64_t align_size = tensor_desc_size;
  638. if (is_nopadding) {
  639. mem_offset += nopadding_size;
  640. extra_memory_size += (tensor_desc_size - nopadding_size);
  641. real_size = nopadding_size;
  642. } else {
  643. ge::AlignMemOffset(align_size);
  644. mem_offset += align_size;
  645. // The head and tail of hcom continuous input should be added 512
  646. extra_memory_size = MEM_ALIGN_SIZE;
  647. real_size = tensor_desc_size;
  648. }
  649. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  650. "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(),
  651. peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
  652. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  653. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  654. }
  655. mem_offset += extra_memory_size;
  656. ge::AlignMemOffset(mem_offset);
  657. continuous_mem_size = mem_offset - continuous_mem_start;
  658. if (is_continuous_input_allocated) {
  659. // not allocate memory here, so no need add 512 in header
  660. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  661. } else {
  662. iter->second.mem_offset_ = mem_offset;
  663. }
  664. return SUCCESS;
  665. }
  666. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  667. auto in_data_anchor_list = node->GetAllInDataAnchors();
  668. if (in_data_anchor_list.empty()) {
  669. REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect",
  670. node->GetName().c_str());
  671. GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str());
  672. return FAILED;
  673. }
  674. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  675. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr,
  676. REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, not expect for node:%s",
  677. node->GetName().c_str());
  678. GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str());
  679. return ge::FAILED);
  680. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  681. GE_IF_BOOL_EXEC(peer_op_desc == nullptr,
  682. REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, not expect for node:%s",
  683. node->GetName().c_str());
  684. GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str());
  685. return ge::FAILED);
  686. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  687. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  688. REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, judge invalid for node:%s",
  689. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  690. GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s",
  691. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  692. return FAILED;
  693. }
  694. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  695. return SUCCESS;
  696. }
  697. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  698. uint32_t continuous_type) {
  699. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  700. auto out_op_desc = node->GetOpDesc();
  701. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  702. REPORT_INNER_ERROR("E19999", "OpDesc is null, not expect for node:%s",
  703. node->GetName().c_str());
  704. GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str()));
  705. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  706. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  707. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s",
  708. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  709. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  710. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  711. return ge::FAILED;
  712. }
  713. int64_t mem_offset = 0;
  714. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  715. if (is_nopadding) {
  716. // out tensor memory must be reused input tensor memory
  717. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  718. return ge::FAILED;
  719. }
  720. } else {
  721. // Get the reference type of the node, default is false
  722. bool is_ref = false;
  723. // If GetBool fail, is_ref is false.
  724. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  725. // If the output is ref type and refers to the ref of an input, the name of the output
  726. // and the input are the same. Ge encounters ref type, finds matching relationship according
  727. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  728. if (is_ref) {
  729. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  730. node->GetName().c_str());
  731. return SUCCESS;
  732. }
  733. mem_offset = output_list[0];
  734. }
  735. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  736. output_list[out_data_anchor->GetIdx()] = mem_offset;
  737. int64_t tensor_desc_size = 0;
  738. int64_t nopadding_size = 0;
  739. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  740. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  741. return FAILED;
  742. }
  743. if (is_nopadding) {
  744. mem_offset += nopadding_size;
  745. } else {
  746. mem_offset += tensor_desc_size;
  747. ge::AlignMemOffset(mem_offset);
  748. }
  749. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  750. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  751. out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
  752. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  753. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  754. }
  755. out_op_desc->SetOutputOffset(output_list);
  756. return ge::SUCCESS;
  757. }
  758. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  759. // key:dynamic batch, batch name
  760. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  761. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  762. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  763. if (status != SUCCESS) {
  764. GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s",
  765. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  766. return status;
  767. }
  768. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  769. if (mem_iter == memory_offset_.end()) {
  770. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  771. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  772. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  773. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  774. return FAILED;
  775. }
  776. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  777. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  778. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  779. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  780. for (auto &iter : iter_batch.second) {
  781. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  782. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  783. for (auto &atomic_node : iter.second) {
  784. vector<int64_t> mem_offset_end;
  785. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  786. if (status != SUCCESS) {
  787. GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  788. atomic_node->GetName().c_str());
  789. return status;
  790. }
  791. }
  792. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  793. if (atomic_mem_size != 0) {
  794. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  795. "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str());
  796. }
  797. }
  798. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  799. }
  800. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  801. batch_atomic_mem_start = batch_max_mem_offset;
  802. for (auto &iter_batch : connecting_output_atomic_nodes) {
  803. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  804. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  805. GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed."
  806. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  807. return FAILED;
  808. }
  809. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  810. }
  811. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  812. return SUCCESS;
  813. }
  814. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  815. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  816. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  817. GE_CHECK_NOTNULL(compute_graph_);
  818. for (const auto &node : compute_graph_->GetAllNodes()) {
  819. if (node->GetType() == ATOMICADDRCLEAN) {
  820. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  821. const auto &out_control_anchor = node->GetOutControlAnchor();
  822. GE_CHECK_NOTNULL(out_control_anchor);
  823. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  824. if (peer_in_control_anchor != nullptr) {
  825. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  826. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  827. if (peer_in_node_desc != nullptr) {
  828. bool is_atomic_node = false;
  829. // If GetBool fail, is_atomic_node is false.
  830. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  831. if (is_atomic_node) {
  832. bool is_reference = false;
  833. // If GetBool fail, is_reference is false.
  834. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  835. if (is_reference) {
  836. REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, "
  837. "not support now", peer_in_node_desc->GetName().c_str());
  838. GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, "
  839. "not support now", peer_in_node_desc->GetName().c_str());
  840. return ge::PARAM_INVALID;
  841. }
  842. std::string batch_label;
  843. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  844. vector<int> is_connecting_output;
  845. // If GetBool fail, attr is_connecting_output is an empty vector.
  846. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  847. if (is_connecting_output.empty()) {
  848. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  849. continue;
  850. }
  851. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  852. tmp_normal_atomic_nodes[batch_label].clear();
  853. break;
  854. }
  855. }
  856. }
  857. }
  858. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  859. if (!it_atomic_node.second.empty()) {
  860. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  861. }
  862. }
  863. }
  864. }
  865. return SUCCESS;
  866. }
  867. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  868. vector<int64_t> &mem_offset_end) {
  869. auto node_op_desc = node->GetOpDesc();
  870. // Assign atomic node output memory
  871. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  872. if (ret != SUCCESS) {
  873. GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str());
  874. return ret;
  875. }
  876. // Check and assign atomic node workspace memory
  877. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  878. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  879. if (!atomic_workspace_info.empty()) {
  880. bool is_fusion_node = false;
  881. // If GetBool fail, is_fusion_node is false.
  882. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  883. if (is_fusion_node) {
  884. // Assign fusion atomic node workspace memory
  885. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  886. } else {
  887. // Assign single ordinary atomic node workspace memory, not include fusion node
  888. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  889. }
  890. if (ret != SUCCESS) {
  891. GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str());
  892. return ret;
  893. }
  894. } else {
  895. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  896. }
  897. return SUCCESS;
  898. }
  899. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  900. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  901. if (iter == memory_offset_.end()) {
  902. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  903. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  904. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  905. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  906. return FAILED;
  907. }
  908. for (auto &node : connect_netoutput_nodes) {
  909. GE_CHECK_NOTNULL(node);
  910. if (node->GetOpDesc() == nullptr) {
  911. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  912. continue;
  913. }
  914. // Atomic memory start addr
  915. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  916. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  917. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  918. vector<int64_t> mem_offset_end;
  919. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  920. GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  921. node->GetName().c_str());
  922. return FAILED;
  923. }
  924. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  925. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  926. GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str());
  927. return FAILED;
  928. }
  929. }
  930. return SUCCESS;
  931. }
  932. Status GraphMemoryAssigner::AssignReferenceMemory() {
  933. for (auto &node : compute_graph_->GetDirectNode()) {
  934. // Get the reference type of the node, default is false
  935. bool is_ref = false;
  936. // If GetBool fail, is_ref is false.
  937. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  938. if (!is_ref) {
  939. continue;
  940. }
  941. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  942. node->GetName().c_str());
  943. auto out_op_desc = node->GetOpDesc();
  944. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  945. REPORT_INNER_ERROR("E19999", "out_op_desc is null.");
  946. GELOGE(ge::FAILED, "[Check][Param] out_op_desc is null."); return ge::FAILED);
  947. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  948. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  949. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s",
  950. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  951. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  952. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  953. return ge::FAILED;
  954. }
  955. map<string, int> input_name_index;
  956. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  957. int index = out_op_desc->GetInputIndexByName(input_name);
  958. input_name_index.emplace(input_name, index);
  959. }
  960. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  961. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  962. auto iter = input_name_index.find(out_data_anchor_name);
  963. if (iter != input_name_index.end()) {
  964. int index = iter->second;
  965. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  966. iter->first.c_str(), out_data_anchor_name.c_str());
  967. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  968. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  969. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  970. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  971. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  972. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  973. GE_CHECK_NOTNULL(peer_out_op_desc);
  974. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  975. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  976. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  977. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  978. } else {
  979. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  980. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  981. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  982. }
  983. }
  984. out_op_desc->SetOutputOffset(output_list);
  985. }
  986. return ge::SUCCESS;
  987. }
  988. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  989. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  990. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  991. if (peer_out_data_anchor == nullptr) {
  992. continue;
  993. }
  994. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  995. if (peer_op_desc == nullptr) {
  996. continue;
  997. }
  998. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  999. (peer_op_desc->GetType() == VARIABLE)) {
  1000. REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), "
  1001. "this situation not supported now",
  1002. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  1003. GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), "
  1004. "this situation not supported now",
  1005. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  1006. return false;
  1007. }
  1008. }
  1009. return true;
  1010. }
  1011. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  1012. auto op_desc = node->GetOpDesc();
  1013. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1014. mem_offset_end.clear();
  1015. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1016. vector<int64_t> atomic_output_index;
  1017. // If GetListInt fail, atomic_output_index is empty.
  1018. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1019. // Check atomic output
  1020. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1021. if (atomic_output_index.size() > output_list.size()) {
  1022. std::string error =
  1023. "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) +
  1024. " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list";
  1025. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1026. return ge::FAILED;
  1027. }
  1028. auto output_list_size = static_cast<int64_t>(output_list.size());
  1029. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1030. if (iter == memory_offset_.end()) {
  1031. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1032. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1033. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1034. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1035. return FAILED;
  1036. }
  1037. for (auto &output_index : atomic_output_index) {
  1038. if (output_index >= output_list_size) {
  1039. std::string error =
  1040. "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) +
  1041. " is more than the size:" + FmtToStr(output_list_size) + " of output_list.";
  1042. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1043. return ge::PARAM_INVALID;
  1044. }
  1045. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1046. bool is_assigned_mem = false;
  1047. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1048. GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld",
  1049. node->GetName().c_str(), output_index);
  1050. return ge::FAILED;
  1051. }
  1052. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1053. if (is_assigned_mem) {
  1054. GELOGI(
  1055. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1056. "ReAssignContinuousMemory function.",
  1057. op_desc->GetName().c_str());
  1058. continue;
  1059. }
  1060. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1061. int64_t size = 0;
  1062. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1063. GELOGI("Get size failed");
  1064. }
  1065. output_list[output_index] = iter->second.mem_offset_;
  1066. std::string batch_label;
  1067. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1068. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%u] "
  1069. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  1070. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  1071. size, size, batch_label.c_str());
  1072. iter->second.mem_offset_ += size;
  1073. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1074. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1075. }
  1076. op_desc->SetOutputOffset(output_list);
  1077. return ge::SUCCESS;
  1078. }
  1079. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1080. bool &is_mem_assigned) {
  1081. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1082. std::string error =
  1083. "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) +
  1084. " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors.";
  1085. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1086. return ge::PARAM_INVALID;
  1087. }
  1088. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1089. GE_CHECK_NOTNULL(out_data_anchor);
  1090. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1091. for (auto &input_anchor : input_anchors) {
  1092. auto output_node = input_anchor->GetOwnerNode();
  1093. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1094. /// has been assigned
  1095. vector<int64_t> atomic_input_index;
  1096. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1097. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1098. is_mem_assigned = true;
  1099. break;
  1100. }
  1101. }
  1102. return SUCCESS;
  1103. }
  1104. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1105. map<string, map<int64_t, int64_t>> &workspace_info,
  1106. vector<int64_t> &mem_offset_end) {
  1107. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1108. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1109. if (mem_type_iter == memory_offset_.end()) {
  1110. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1111. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1112. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1113. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1114. return FAILED;
  1115. }
  1116. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1117. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1118. if (op_desc->GetName() != iter->first) {
  1119. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1120. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1121. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1122. return ge::PARAM_INVALID;
  1123. }
  1124. if (iter->second.empty()) {
  1125. continue;
  1126. }
  1127. for (auto &info_iter : iter->second) {
  1128. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1129. auto workspace_size = info_iter.second;
  1130. if (workspace_index >= workspace_vector.size()) {
  1131. std::string error = "The workspace index:" + FmtToStr(workspace_index) +
  1132. " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" +
  1133. op_desc->GetName().c_str();
  1134. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1135. return ge::PARAM_INVALID;
  1136. }
  1137. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1138. std::string batch_label;
  1139. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1140. GELOGI(
  1141. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1142. "memtype[%u] size[%ld] real_size[%ld] batch[%s].",
  1143. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  1144. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  1145. batch_label.c_str());
  1146. mem_type_iter->second.mem_offset_ += workspace_size;
  1147. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1148. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1149. }
  1150. }
  1151. op_desc->SetWorkspace(workspace_vector);
  1152. return SUCCESS;
  1153. }
  1154. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1155. map<string, map<int64_t, int64_t>> &workspace_info,
  1156. vector<int64_t> &mem_offset_end) {
  1157. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1158. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1159. if (mem_type_iter == memory_offset_.end()) {
  1160. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1161. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1162. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1163. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1164. return FAILED;
  1165. }
  1166. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1167. for (auto &iter : workspace_info) {
  1168. if (iter.second.empty()) {
  1169. continue;
  1170. }
  1171. map<int64_t, int64_t> index_offset;
  1172. for (auto &info_iter : iter.second) {
  1173. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1174. auto workspace_size = info_iter.second;
  1175. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1176. std::string batch_label;
  1177. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1178. GELOGI(
  1179. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1180. "memtype[%u] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  1181. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  1182. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  1183. mem_type_iter->second.mem_offset_ += workspace_size;
  1184. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1185. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1186. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1187. }
  1188. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1189. }
  1190. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1191. REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s",
  1192. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1193. GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.",
  1194. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1195. return FAILED;
  1196. }
  1197. return SUCCESS;
  1198. }
  1199. Status GraphMemoryAssigner::CheckOffset() {
  1200. std::map<std::string, std::string> anchor_to_symbol;
  1201. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1202. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1203. REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
  1204. GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
  1205. return FAILED;
  1206. }
  1207. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1208. GE_CHECK_NOTNULL(node->GetOpDesc());
  1209. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1210. for (auto input : input_list) {
  1211. if (input == ge::kInvalidOffset) {
  1212. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1213. + " in node" + FmtToStr(node->GetName());
  1214. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1215. return FAILED;
  1216. }
  1217. }
  1218. bool need_update_output = false;
  1219. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1220. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1221. if (output_list[i] == ge::kInvalidOffset) {
  1222. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1223. + " in node" + FmtToStr(node->GetName());
  1224. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1225. return FAILED;
  1226. }
  1227. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1228. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1229. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1230. output_list[i] = symbol_offset;
  1231. need_update_output = true;
  1232. }
  1233. }
  1234. }
  1235. if (need_update_output) {
  1236. node->GetOpDesc()->SetOutputOffset(output_list);
  1237. }
  1238. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1239. for (auto workspace : workspace_list) {
  1240. if (workspace == ge::kInvalidOffset) {
  1241. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1242. + " in node" + FmtToStr(node->GetName());
  1243. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1244. return FAILED;
  1245. }
  1246. }
  1247. // check reuse input and output
  1248. GE_CHK_STATUS_RET(CheckRefNodeOffset(node), "[Check][Offset]fail for node: %s", node->GetName().c_str());
  1249. }
  1250. return SUCCESS;
  1251. }
  1252. ge::Status GraphMemoryAssigner::CheckRefNodeOffset(const NodePtr &node) {
  1253. GE_CHECK_NOTNULL(node);
  1254. std::map<int32_t, int32_t> out2ins;
  1255. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1256. auto opdesc = node->GetOpDesc();
  1257. GE_CHECK_NOTNULL(opdesc);
  1258. auto output_list = opdesc->GetOutputOffset();
  1259. auto input_list = opdesc->GetInputOffset();
  1260. for (const auto &out2in : out2ins) {
  1261. auto out_i = out2in.first;
  1262. if (static_cast<size_t>(out_i) >= output_list.size()) {
  1263. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1264. FmtToStr(output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1265. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1266. return ge::FAILED;
  1267. }
  1268. auto in_i = out2in.second;
  1269. if (static_cast<size_t>(in_i) >= input_list.size()) {
  1270. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset size" +
  1271. FmtToStr(input_list.size()) + "should bigger than ref input index" + FmtToStr(in_i);
  1272. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1273. return ge::FAILED;
  1274. }
  1275. if (output_list[out_i] != input_list[in_i]) {
  1276. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset " + FmtToStr(input_list[in_i]) +
  1277. "should equal to output offset" + FmtToStr(output_list[out_i]) + "with ref in" +
  1278. FmtToStr(in_i) + "to output" + FmtToStr(out_i);
  1279. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1280. return ge::FAILED;
  1281. }
  1282. }
  1283. return ge::SUCCESS;
  1284. }
  1285. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1286. if (memory_offset_.empty()) {
  1287. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  1288. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1289. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  1290. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1291. }
  1292. for (auto pair : memory_offset_) {
  1293. if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) {
  1294. continue;
  1295. }
  1296. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1297. pair.second.mem_offset_, pair.first);
  1298. }
  1299. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1300. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1301. GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str());
  1302. return ge::FAILED;
  1303. }
  1304. }
  1305. return ge::SUCCESS;
  1306. }
  1307. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1308. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1309. return node;
  1310. }
  1311. if (NodeUtils::IsDynamicShape(node)) {
  1312. return node;
  1313. }
  1314. return NodeUtils::GetParentInput(node);
  1315. }
  1316. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1317. uint32_t parent_index = 0;
  1318. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1319. return SUCCESS;
  1320. }
  1321. // Subgraph Data Node, check for constant input.
  1322. std::string op_type;
  1323. const auto &in_node = NodeUtils::GetParentInput(node);
  1324. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1325. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1326. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1327. return SUCCESS; // Constant input.
  1328. }
  1329. // Memory allocated for dynamic shape subgraph Data.
  1330. if (NodeUtils::IsDynamicShape(node)) {
  1331. return SUCCESS;
  1332. }
  1333. const auto &owner = node->GetOwnerComputeGraph();
  1334. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1335. const auto parent_inputs = parent_desc->GetInputOffset();
  1336. if (parent_inputs.size() <= parent_index) {
  1337. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1338. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1339. FmtToStr(parent_index);
  1340. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1341. return FAILED;
  1342. }
  1343. input_list = {parent_inputs[parent_index]};
  1344. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1345. return SUCCESS;
  1346. }
  1347. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1348. vector<int64_t> origin_input_list;
  1349. vector<int64_t> memory_type;
  1350. auto tmp_op_desc = node->GetOpDesc();
  1351. origin_input_list = tmp_op_desc->GetInputOffset();
  1352. int64_t valid_input_index = 0;
  1353. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1354. std::map<int32_t, int32_t> out2ins;
  1355. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1356. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1357. vector<int64_t> output_list;
  1358. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1359. if (peer_out_anchor == nullptr) {
  1360. continue;
  1361. }
  1362. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1363. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1364. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1365. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1366. output_list = last_peer_out_op_desc->GetOutputOffset();
  1367. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1368. if (output_list.size() > static_cast<size_t>(out_index)) {
  1369. bool is_l1_type = false;
  1370. int64_t input_offset = output_list.at(out_index);
  1371. if (has_mem_type_attr && !origin_input_list.empty()) {
  1372. auto input_size = tmp_op_desc->GetInputsSize();
  1373. auto ori_input_offset_list_size = origin_input_list.size();
  1374. auto mem_type_size = memory_type.size();
  1375. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1376. std::string error = "Node" + FmtToStr(tmp_op_desc->GetName()) +
  1377. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1378. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1379. FmtToStr(ori_input_offset_list_size);
  1380. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1381. return ge::FAILED;
  1382. }
  1383. GELOGD("Node[%s] input[%d] has origin offset[%ld]", tmp_op_desc->GetName().c_str(), anchor->GetIdx(),
  1384. origin_input_list[valid_input_index]);
  1385. // L1 keep original input_offset
  1386. is_l1_type = (memory_type[valid_input_index] == RT_MEMORY_L1);
  1387. if (is_l1_type) {
  1388. input_offset = origin_input_list[valid_input_index];
  1389. } else {
  1390. // hbm input_offset = original input_offset + output_offset
  1391. input_offset = origin_input_list[valid_input_index] + output_list.at(out_index);
  1392. }
  1393. }
  1394. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1395. if (in_node->GetType() == CONSTANT) {
  1396. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1397. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1398. }
  1399. if (!is_l1_type) {
  1400. // update ref output_offset when input change
  1401. GE_CHK_STATUS_RET(UpdateRefOpOutputOffset(node, out2ins, anchor->GetIdx(), input_offset),
  1402. "[Update][RefOffset]fail for node: %s", node->GetName().c_str());
  1403. }
  1404. GELOGD("Node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", tmp_op_desc->GetName().c_str(),
  1405. anchor->GetIdx(), peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index,
  1406. input_offset);
  1407. input_list.emplace_back(input_offset);
  1408. valid_input_index++;
  1409. }
  1410. }
  1411. return ge::SUCCESS;
  1412. }
  1413. ge::Status GraphMemoryAssigner::UpdateRefOpOutputOffset(const NodePtr &node, const std::map<int32_t, int32_t> &out2ins,
  1414. const int ref_in, const int64_t input_offset) const {
  1415. auto opdesc = node->GetOpDesc();
  1416. GE_CHECK_NOTNULL(opdesc);
  1417. for (const auto &out2in : out2ins) {
  1418. auto out_i = out2in.first;
  1419. auto in_i = out2in.second;
  1420. if (in_i == ref_in) {
  1421. auto origin_output_list = opdesc->GetOutputOffset();
  1422. if (static_cast<size_t>(out_i) >= origin_output_list.size()) {
  1423. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1424. FmtToStr(origin_output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1425. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1426. return ge::FAILED;
  1427. }
  1428. origin_output_list[out_i] = input_offset;
  1429. opdesc->SetOutputOffset(origin_output_list);
  1430. GELOGI("Node[%s] output[%d] is updated from reuse input index[%d] to offset[%ld]", opdesc->GetName().c_str(),
  1431. out_i, ref_in, input_offset);
  1432. }
  1433. }
  1434. return ge::SUCCESS;
  1435. }
  1436. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1437. GE_CHECK_NOTNULL(node->GetOpDesc());
  1438. vector<int64_t> input_list;
  1439. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1440. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1441. vector<int64_t> output_list;
  1442. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1443. if (peer_out_anchor == nullptr) {
  1444. continue;
  1445. }
  1446. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1447. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1448. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1449. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1450. if (last_peer_out_node->GetType() != VARIABLE) {
  1451. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1452. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1453. output_list = last_peer_out_op_desc->GetOutputOffset();
  1454. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1455. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1456. }
  1457. } else {
  1458. vector<int64_t> cur_node_input_list;
  1459. auto cur_node_op_desc = node->GetOpDesc();
  1460. GE_CHECK_NOTNULL(cur_node_op_desc);
  1461. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1462. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1463. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1464. }
  1465. }
  1466. }
  1467. } else if (node->GetType() == DATA_TYPE) {
  1468. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1469. GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str());
  1470. return FAILED;
  1471. }
  1472. } else {
  1473. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1474. GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str());
  1475. return FAILED;
  1476. }
  1477. }
  1478. node->GetOpDesc()->SetInputOffset(input_list);
  1479. return SUCCESS;
  1480. }
  1481. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1482. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1483. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1484. // Parsing offset and size vectors
  1485. vector<int64_t> memory_offset_start;
  1486. vector<int64_t> memory_offset_size;
  1487. memory_offset_start.emplace_back(atomic_mem_start);
  1488. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1489. memory_offset_start.emplace_back(mem_offset_end[i]);
  1490. // Number 1 means element index
  1491. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1492. memory_offset_size.emplace_back(size);
  1493. }
  1494. memory_offset_start.pop_back();
  1495. const auto &in_control_anchor = node->GetInControlAnchor();
  1496. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1497. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1498. if (peer_out_control_anchor == nullptr) {
  1499. continue;
  1500. }
  1501. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1502. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1503. if (peer_out_node_desc == nullptr) {
  1504. continue;
  1505. }
  1506. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1507. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1508. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1509. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1510. GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1511. return FAILED;
  1512. }
  1513. }
  1514. }
  1515. }
  1516. return SUCCESS;
  1517. }
  1518. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1519. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1520. auto node_op_desc = node->GetOpDesc();
  1521. if (node_op_desc != nullptr) {
  1522. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1523. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1524. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1525. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1526. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1527. node_op_desc->SetWorkspace(workspace_vector);
  1528. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1529. std::vector<int64_t> mem_start_vector;
  1530. // If GetListInt fail, mem_start_vector is empty.
  1531. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1532. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1533. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1534. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1535. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1536. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1537. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1538. return FAILED);
  1539. std::vector<int64_t> mem_size_vector;
  1540. // If GetListInt fail, mem_size_vector is empty.
  1541. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1542. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1543. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1544. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1545. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1546. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1547. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1548. return FAILED);
  1549. std::stringstream ss;
  1550. for (auto iter : atomic_mem_start) {
  1551. ss << iter << " ";
  1552. }
  1553. string atomic_mem_start_str = ss.str();
  1554. ss.clear();
  1555. ss.str("");
  1556. for (auto iter : atomic_mem_size) {
  1557. ss << iter << " ";
  1558. }
  1559. string atomic_mem_size_str = ss.str();
  1560. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1561. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1562. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1563. atomic_mem_size_str.c_str());
  1564. }
  1565. return SUCCESS;
  1566. }
  1567. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1568. if (mem_align_size <= 0) {
  1569. return;
  1570. }
  1571. auto iter = memory_offset_.find(memory_type);
  1572. if (iter == memory_offset_.end()) {
  1573. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1574. return;
  1575. }
  1576. iter->second.mem_offset_ =
  1577. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1578. }
  1579. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1580. int64_t &memory_type) {
  1581. memory_type = RT_MEMORY_HBM;
  1582. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1583. for (auto &n : nodes) {
  1584. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1585. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"),
  1586. "[Get][MemType:input]fail for node:%s", n->GetName().c_str())
  1587. break;
  1588. }
  1589. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1590. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"),
  1591. "[Get][MemType:output]fail for node:%s", n->GetName().c_str())
  1592. break;
  1593. }
  1594. }
  1595. return SUCCESS;
  1596. }
  1597. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1598. memory_type = RT_MEMORY_HBM;
  1599. vector<int64_t> mem_type_list;
  1600. if (input_or_output == "input") {
  1601. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1602. }
  1603. if (input_or_output == "output") {
  1604. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1605. }
  1606. if (mem_type_list.empty()) {
  1607. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1608. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1609. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1610. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1611. return FAILED;
  1612. }
  1613. return SUCCESS;
  1614. }
  1615. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1616. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1617. " of mem type list is not equal to the size of in data anchor" +
  1618. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1619. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1620. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1621. return FAILED;
  1622. }
  1623. if (!CheckContinuousMemType(mem_type_list)) {
  1624. GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str());
  1625. return FAILED;
  1626. }
  1627. // It is continuous memory and memory type is the same, so use the first memory.
  1628. memory_type = mem_type_list[0];
  1629. return SUCCESS;
  1630. }
  1631. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1632. if (mem_type_list.size() == 0) {
  1633. return true;
  1634. }
  1635. int64_t mem_type_tmp = mem_type_list[0];
  1636. for (auto mem_type : mem_type_list) {
  1637. if (mem_type != mem_type_tmp) {
  1638. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1639. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1640. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1641. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1642. mem_type_tmp, mem_type);
  1643. return false;
  1644. }
  1645. }
  1646. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1647. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1648. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1649. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1650. return false;
  1651. }
  1652. return true;
  1653. }
  1654. void GraphMemoryAssigner::PrintMemoryOffset() {
  1655. for (auto pair : memory_offset_) {
  1656. // Assign memory of max batch nodes that have the same batch label.
  1657. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1658. pair.first, pair.second.mem_offset_);
  1659. }
  1660. }
  1661. ge::Status GraphMemoryAssigner::TryGetNodeRefIndexes(const NodePtr &node, map<int32_t, int32_t> &out2ins) const{
  1662. // data and netoutput no need check because only data's output or netoutput's input is used
  1663. if (node->GetType() == DATA || node->GetType() == NETOUTPUT) {
  1664. return ge::SUCCESS;
  1665. }
  1666. for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1667. int32_t reuse_in_index = -1;
  1668. // nopadding means output[0] reuse input[0], but as history reason,
  1669. // other output index also return true for mem assign in block_mem_assigner
  1670. if (GraphUtils::IsNoPaddingRefFromInput(out_data_anchor, reuse_in_index)) {
  1671. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1672. return ge::SUCCESS;
  1673. }
  1674. bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
  1675. if (reuse_input_flag) {
  1676. if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
  1677. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1678. } else {
  1679. REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, "
  1680. "please check attr reuse_input",
  1681. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1682. GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, "
  1683. "please check attr reuse_input",
  1684. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1685. return FAILED;
  1686. }
  1687. }
  1688. }
  1689. return ge::SUCCESS;
  1690. }
  1691. bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  1692. const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  1693. for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
  1694. if (in_node->GetType() == VARIABLE) {
  1695. GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
  1696. in_node->GetName().c_str());
  1697. return true;
  1698. }
  1699. auto iter = node_2_continuous_type.find(in_node);
  1700. // In node's topo order in the front, so function can not be exception
  1701. auto continuous_type = iter->second;
  1702. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1703. if (continuous_input) {
  1704. GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory",
  1705. input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
  1706. return false;
  1707. }
  1708. }
  1709. for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
  1710. auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
  1711. node_2_continuous_type.emplace(out_node, continuous_type);
  1712. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1713. if (continuous_input) {
  1714. GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory",
  1715. input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
  1716. return false;
  1717. }
  1718. }
  1719. return true;
  1720. }
  1721. ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
  1722. uint32_t continuous_type,
  1723. bool reverse_refresh) {
  1724. int64_t mem_clean_start = 0;
  1725. int64_t mem_clean_size = 0;
  1726. int64_t memory_type = RT_MEMORY_HBM;
  1727. GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"),
  1728. "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str());
  1729. auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
  1730. continuous_type, reverse_refresh);
  1731. if (ret != ge::SUCCESS) {
  1732. GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str());
  1733. return ret;
  1734. }
  1735. // Clean up atomic address, eg, hcom node
  1736. vector<int32_t> input_indexes;
  1737. // If GetListInt fail, input_indexes is empty.
  1738. (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  1739. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  1740. // check whether there is an atomic conflict between the current node and the peer out node
  1741. if (!CheckInputIsSupportAtomic(input_continuous_node)) {
  1742. return ge::FAILED;
  1743. }
  1744. const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
  1745. GE_CHECK_NOTNULL(in_control_anchor);
  1746. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1747. GE_CHECK_NOTNULL(peer_out_control_anchor);
  1748. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1749. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  1750. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  1751. if (ret != SUCCESS) {
  1752. GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1753. return ret;
  1754. }
  1755. }
  1756. }
  1757. }
  1758. return ge::SUCCESS;
  1759. }
  1760. Status GraphMemoryAssigner::AssignBufferPoolMemory() {
  1761. auto is_buffer_pool_mem_enable = [] (const ComputeGraphPtr &graph) -> bool {
  1762. for (NodePtr &node : graph->GetAllNodes()) {
  1763. auto op_desc = node->GetOpDesc();
  1764. if (op_desc == nullptr) {
  1765. continue;
  1766. }
  1767. bool has_attrs = op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_ID) && op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_SIZE);
  1768. if (has_attrs) {
  1769. return true;
  1770. }
  1771. }
  1772. return false;
  1773. };
  1774. auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
  1775. GE_CHECK_NOTNULL(root_graph);
  1776. if (root_graph->GetGraphUnknownFlag()) {
  1777. GELOGI("[Check][Enable]Unknown root graph does not support buffer pool memory, graph:%s.",
  1778. compute_graph_->GetName().c_str());
  1779. return SUCCESS;
  1780. }
  1781. if (!is_buffer_pool_mem_enable(compute_graph_)) {
  1782. GELOGD("[Check][Enable]Buffer pool memory is not enable, graph:%s.", compute_graph_->GetName().c_str());
  1783. return SUCCESS;
  1784. }
  1785. map<int64_t, size_t> mem_type_to_offset;
  1786. for (const auto &pair : memory_offset_) {
  1787. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  1788. }
  1789. BufferPoolMemAssigner buffer_pool_mem_assigner(compute_graph_, mem_type_to_offset);
  1790. Status status = buffer_pool_mem_assigner.Assign();
  1791. if (status != SUCCESS) {
  1792. GELOGE(status, "[Assign][BufferPoolMem]Graph:%s.", compute_graph_->GetName().c_str());
  1793. REPORT_INNER_ERROR("E19999", "Failed to assign buffer pool memory, graph:%s.", compute_graph_->GetName().c_str());
  1794. return status;
  1795. }
  1796. int64_t mem_type = buffer_pool_mem_assigner.GetMemType();
  1797. auto iter = memory_offset_.find(mem_type);
  1798. if (iter == memory_offset_.end()) {
  1799. GELOGE(FAILED, "[Check][MemType]Memory type is not supported, graph:%s, mem type:%ld.",
  1800. compute_graph_->GetName().c_str(), mem_type);
  1801. REPORT_INNER_ERROR("E19999", "Memory type is not supported, graph:%s, mem type:%ld.",
  1802. compute_graph_->GetName().c_str(), mem_type);
  1803. return FAILED;
  1804. }
  1805. iter->second.mem_offset_ = buffer_pool_mem_assigner.GetMemOffset();
  1806. GELOGI("[Assign][BufferPoolMem]Assign buffer pool memory successfully, graph:%s, mem type:%ld, mem offset:%zu.",
  1807. compute_graph_->GetName().c_str(), mem_type, buffer_pool_mem_assigner.GetMemOffset());
  1808. return SUCCESS;
  1809. }
  1810. // if producer and customers in the same stream, or customers on the same stream when producer not assign a stream,
  1811. // then return false.
  1812. bool GraphMemoryAssigner::IsOutputVisitedByMultiStream(const NodePtr &peer_out_node, int64_t out_anchor_index) {
  1813. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, return true);
  1814. int64_t unique_stream_id = peer_out_node->GetOpDesc()->GetStreamId();
  1815. GE_IF_BOOL_EXEC(peer_out_node->GetOutDataAnchor(out_anchor_index) == nullptr, return true);
  1816. for (const auto &in_data_anchor : peer_out_node->GetOutDataAnchor(out_anchor_index)->GetPeerInDataAnchors()) {
  1817. auto node = in_data_anchor->GetOwnerNode();
  1818. GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, continue);
  1819. if (node->GetOpDesc()->GetStreamId() == kInvalidStream) {
  1820. continue;
  1821. }
  1822. if (unique_stream_id == kInvalidStream) { // peer_out_node not belong to any stream
  1823. unique_stream_id = node->GetOpDesc()->GetStreamId();
  1824. continue;
  1825. }
  1826. if (node->GetOpDesc()->GetStreamId() != unique_stream_id) {
  1827. return true;
  1828. }
  1829. }
  1830. return false;
  1831. }
  1832. void GraphMemoryAssigner::UpdatePrevNodeInputDesc(const NodePtr &prev_node,
  1833. const vector<int64_t> &prev_node_input_index_vec,
  1834. int64_t distance) {
  1835. GE_IF_BOOL_EXEC(prev_node == nullptr, return);
  1836. auto prev_node_op_desc = prev_node->GetOpDesc();
  1837. GE_IF_BOOL_EXEC(prev_node_op_desc == nullptr, return);
  1838. for (const auto prev_node_input_index : prev_node_input_index_vec) {
  1839. auto input_desc = prev_node_op_desc->GetInputDesc(prev_node_input_index);
  1840. vector<int64_t> prev_next_distances;
  1841. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1842. GELOGW("Get [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed",
  1843. prev_node_op_desc->GetName().c_str(),
  1844. prev_node_input_index);
  1845. continue;
  1846. }
  1847. if (prev_next_distances.size() == kPrevNextDistanceNum) {
  1848. prev_next_distances[1] = distance;
  1849. } else {
  1850. GELOGW("Size of prev_next_distances is not %d.", kPrevNextDistanceNum);
  1851. continue;
  1852. }
  1853. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1854. GELOGW("Set [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1855. prev_node_op_desc->GetName().c_str(),
  1856. prev_node_input_index);
  1857. continue;
  1858. }
  1859. if (prev_node_op_desc->UpdateInputDesc(prev_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1860. GELOGW("Update [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1861. prev_node_op_desc->GetName().c_str(),
  1862. prev_node_input_index);
  1863. continue;
  1864. }
  1865. GELOGD("Set the next distance[%ld] to node[%s], input index[%ld]",
  1866. distance,
  1867. prev_node->GetName().c_str(),
  1868. prev_node_input_index);
  1869. }
  1870. return;
  1871. }
  1872. void GraphMemoryAssigner::UpdateCurNodeInputDesc(const NodePtr &cur_node,
  1873. int64_t cur_node_input_index,
  1874. int64_t distance) {
  1875. GE_IF_BOOL_EXEC(cur_node == nullptr, return);
  1876. GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, return);
  1877. auto input_desc = cur_node->GetOpDesc()->GetInputDesc(cur_node_input_index);
  1878. vector<int64_t> prev_next_distances{distance, -1};
  1879. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1880. GELOGW("Set [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1881. cur_node->GetOpDesc()->GetName().c_str(),
  1882. cur_node_input_index);
  1883. return;
  1884. }
  1885. if (cur_node->GetOpDesc()->UpdateInputDesc(cur_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1886. GELOGW("Update [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1887. cur_node->GetOpDesc()->GetName().c_str(),
  1888. cur_node_input_index);
  1889. return;
  1890. }
  1891. GELOGD("Set the prev distance[%ld] to node[%s], input index[%ld]",
  1892. distance,
  1893. cur_node->GetName().c_str(),
  1894. cur_node_input_index);
  1895. return;
  1896. }
  1897. void GraphMemoryAssigner::CheckNeedCalcDistAndUpdateVisitInfo(
  1898. const NodePtr &peer_out_node,
  1899. const OutDataAnchorPtr &peer_out_anchor,
  1900. size_t matched_mem_offset,
  1901. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1902. bool &is_need_calc_distance) {
  1903. auto iter = mem_block_visit_info.find(matched_mem_offset);
  1904. // cannot find visit info, peer_out_node must be a producer and this data is the first time to be visited.
  1905. if (iter == mem_block_visit_info.end()) {
  1906. if (IsOutputVisitedByMultiStream(peer_out_node, peer_out_anchor->GetIdx())) {
  1907. vector<int64_t> temp;
  1908. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(nullptr, temp)));
  1909. is_need_calc_distance = false;
  1910. return;
  1911. } else {
  1912. vector<int64_t> temp = {-1};
  1913. // producer's prev_node_index set to -1 as default
  1914. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(peer_out_node, temp)));
  1915. is_need_calc_distance = true;
  1916. return;
  1917. }
  1918. } else {
  1919. if (mem_block_visit_info[matched_mem_offset].first == nullptr) {
  1920. // multi-stream visit, no need to calculate
  1921. is_need_calc_distance = false;
  1922. return;
  1923. }
  1924. if (peer_out_node->GetOpDesc()->GetStreamId() !=
  1925. mem_block_visit_info[matched_mem_offset].first->GetOpDesc()->GetStreamId()) {
  1926. // cur node and peer_out_node not in the same stream, no need to calculate
  1927. is_need_calc_distance = false;
  1928. return;
  1929. }
  1930. }
  1931. is_need_calc_distance = true;
  1932. return;
  1933. }
  1934. // calculate distance, update visit info, update prev_node input desc, update cur node input desc
  1935. void GraphMemoryAssigner::CalcDistanceAndUpdateDesc(const map<string, int64_t> &node_index_in_stream,
  1936. const InDataAnchorPtr &in_data_anchor,
  1937. size_t matched_mem_offset,
  1938. NodePtr &node,
  1939. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1940. bool &is_need_skip) {
  1941. int64_t distance = -1;
  1942. auto prev_node = mem_block_visit_info[matched_mem_offset].first;
  1943. auto prev_node_input_index_vec = mem_block_visit_info[matched_mem_offset].second;
  1944. GE_IF_BOOL_EXEC(prev_node == nullptr, is_need_skip = true; return);
  1945. if (prev_node_input_index_vec.size() == 1 && prev_node_input_index_vec[0] == -1) {
  1946. // prev_node is producer and the data is just be produced(not visited by other node)
  1947. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  1948. if (prev_node->GetOpDesc()->GetStreamId() == -1) { // producer not assigned a stream
  1949. distance = 0;
  1950. } else {
  1951. auto iter = node_index_in_stream.find(prev_node->GetName());
  1952. if (iter == node_index_in_stream.end()) {
  1953. distance = 0;
  1954. } else {
  1955. distance = node_index_in_stream.at(node->GetName()) - iter->second - 1;
  1956. }
  1957. }
  1958. mem_block_visit_info[matched_mem_offset].first = node;
  1959. mem_block_visit_info[matched_mem_offset].second.clear();
  1960. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1961. } else { // the data is visit by other customer just before.
  1962. if (prev_node_input_index_vec.empty()) {
  1963. GELOGW("Missing prev node[%s] input index.", prev_node->GetName().c_str());
  1964. is_need_skip = true;
  1965. return;
  1966. }
  1967. if (prev_node == node) { // scene: multiple anchors of a node access the same data
  1968. vector<int64_t> prev_next_distances;
  1969. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  1970. auto input_desc = prev_node->GetOpDesc()->GetInputDesc(prev_node_input_index_vec[0]);
  1971. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1972. GELOGW("Get ATTR_NAME_DATA_VISIT_DISTANCE failed.");
  1973. is_need_skip = true;
  1974. return;
  1975. }
  1976. if (prev_next_distances.size() != kPrevNextDistanceNum) {
  1977. GELOGW("Size of prev_next_distance is not %d.", kPrevNextDistanceNum);
  1978. is_need_skip = true;
  1979. return;
  1980. } else {
  1981. distance = prev_next_distances[0]; // use the same prev_distance as previous anchor
  1982. }
  1983. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1984. } else {
  1985. distance = node_index_in_stream.at(node->GetName()) - node_index_in_stream.at(prev_node->GetName()) - 1;
  1986. UpdatePrevNodeInputDesc(prev_node, prev_node_input_index_vec, distance);
  1987. mem_block_visit_info[matched_mem_offset].first = node;
  1988. mem_block_visit_info[matched_mem_offset].second.clear();
  1989. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  1990. }
  1991. }
  1992. UpdateCurNodeInputDesc(node, in_data_anchor->GetIdx(), distance);
  1993. }
  1994. void GraphMemoryAssigner::DeleteVisitInfoWhenLifecycleEnded(
  1995. const NodePtr &node,
  1996. const InDataAnchorPtr &in_data_anchor,
  1997. size_t matched_mem_offset,
  1998. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info) {
  1999. GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, return);
  2000. auto input_desc = node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx());
  2001. bool is_end_of_inputmem_lifecycle = false;
  2002. // if is_end_of_inputmem_lifecycle is true, indicating that cur node is the last customer of this data,
  2003. // then we need to delete the visit info of the block in case that the memblock be reused and visited.
  2004. if (ge::AttrUtils::GetBool(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, is_end_of_inputmem_lifecycle) &&
  2005. is_end_of_inputmem_lifecycle) {
  2006. GELOGD("ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE is true, node name is [%s], in_data_anchor index is [%d]",
  2007. node->GetName().c_str(),
  2008. in_data_anchor->GetIdx());
  2009. auto iter = mem_block_visit_info.find(matched_mem_offset);
  2010. if (iter != mem_block_visit_info.end()) {
  2011. mem_block_visit_info.erase(iter);
  2012. }
  2013. }
  2014. }
  2015. void GraphMemoryAssigner::MarkNodeDistanceAttr(const ComputeGraphPtr &compute_graph,
  2016. NodePtr &node,
  2017. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  2018. const map<string, int64_t> &node_index_in_stream) {
  2019. GELOGD("Begin to mark node distance attr, node name is [%s]", node->GetName().c_str());
  2020. GE_IF_BOOL_EXEC(node == nullptr, return);
  2021. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  2022. auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  2023. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  2024. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  2025. GE_IF_BOOL_EXEC(peer_out_node == nullptr, continue);
  2026. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, continue);
  2027. auto matched_mem_offset = peer_out_node->GetOpDesc()->GetOutputOffset().at(peer_out_anchor->GetIdx());
  2028. bool is_need_calc_distance = false;
  2029. CheckNeedCalcDistAndUpdateVisitInfo(peer_out_node, peer_out_anchor, matched_mem_offset,
  2030. mem_block_visit_info, is_need_calc_distance);
  2031. if (!is_need_calc_distance) {
  2032. continue;
  2033. }
  2034. bool is_need_skip = false;
  2035. CalcDistanceAndUpdateDesc(node_index_in_stream, in_data_anchor, matched_mem_offset, node,
  2036. mem_block_visit_info, is_need_skip);
  2037. if (is_need_skip) {
  2038. continue;
  2039. }
  2040. DeleteVisitInfoWhenLifecycleEnded(node, in_data_anchor, matched_mem_offset, mem_block_visit_info);
  2041. }
  2042. }
  2043. void GraphMemoryAssigner::MarkDistanceAttr() {
  2044. // key: mem_offset of the memory which we visited. value: node we visited and input index of this node
  2045. map<size_t, pair<NodePtr, vector<int64_t>>> mem_block_visit_info;
  2046. // key: node name, value: topo order of node in it's belonged stream(exclude ge_local_op)
  2047. map<string, int64_t> node_index_in_stream;
  2048. // key: stream id, value: cur nodes num in that stream
  2049. map<int64_t, int64_t> stream_nodes_num;
  2050. for (auto &node : compute_graph_->GetAllNodes()) {
  2051. auto node_op_desc = node->GetOpDesc();
  2052. GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
  2053. int64_t stream_id = node_op_desc->GetStreamId();
  2054. if (node_op_desc->GetOpKernelLibName() != kEngineNameGeLocal) {
  2055. if (stream_nodes_num.find(stream_id) == stream_nodes_num.end()) {
  2056. stream_nodes_num.insert(std::make_pair(stream_id, 1));
  2057. } else {
  2058. ++stream_nodes_num[stream_id];
  2059. }
  2060. node_index_in_stream.insert(std::make_pair(node->GetName(), stream_nodes_num[stream_id] - 1));
  2061. MarkNodeDistanceAttr(compute_graph_, node, mem_block_visit_info, node_index_in_stream);
  2062. } else {
  2063. GELOGD("node[%s] is ge_local_op, no need to calculate distance.", node->GetName().c_str());
  2064. }
  2065. }
  2066. }
  2067. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示