You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 110 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. #include "graph/build/memory/buffer_pool_mem_assigner.h"
  33. namespace {
  34. const int kAllInputAddrIsAtomic = -1;
  35. const int kVirtualInputNodeMemoryReuse = 0;
  36. const int kVirtualOutputNodeMemoryReuse = 1;
  37. const int kPrevNextDistanceNum = 2;
  38. const int64_t kInvalidStream = -1;
  39. const char *const kEngineNameGeLocal = "DNN_VM_GE_LOCAL_OP_STORE";
  40. // One state per bit cannot be repeated
  41. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  42. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  43. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  44. const ge::NodePtr &node, const uint32_t i) {
  45. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  46. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  47. if (iter1 == anchor_to_symbol.end()) {
  48. return ge::kInvalidOffset;
  49. }
  50. auto out_symbol = iter1->second;
  51. auto iter2 = symbol_to_anchors.find(out_symbol);
  52. if (iter2 == symbol_to_anchors.end()) {
  53. return ge::kInvalidOffset;
  54. }
  55. for (const auto &node_index_io : iter2->second) {
  56. if (node_index_io.value_ == out_symbol) {
  57. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  58. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  59. if (node_index_io.index_ >= symbol_output_list.size()) {
  60. return ge::kInvalidOffset;
  61. }
  62. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  63. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  64. return symbol_output_list.at(node_index_io.index_);
  65. }
  66. }
  67. return ge::kInvalidOffset;
  68. }
  69. bool isVariableMemoryNode(const ge::NodePtr &node) {
  70. return (node->GetType() == ge::VARIABLE) || (node->GetType() == ge::CONSTANTOP);
  71. }
  72. } // namespace
  73. namespace ge {
  74. Status VariableMemoryAssigner::Assign() {
  75. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  76. if (result != ge::SUCCESS) {
  77. return result;
  78. }
  79. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  80. if (result != ge::SUCCESS) {
  81. return result;
  82. }
  83. return ge::SUCCESS;
  84. }
  85. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  86. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  87. if (result != ge::SUCCESS) {
  88. return result;
  89. }
  90. return ge::SUCCESS;
  91. }
  92. Status VariableMemoryAssigner::AssignMemory2HasRefAttrNode() {
  93. Status result = ge::VarMemAssignUtil::AssignMemory2HasRefAttrNode(compute_graph_);
  94. if (result != ge::SUCCESS) {
  95. return result;
  96. }
  97. return ge::SUCCESS;
  98. }
  99. Status GraphMemoryAssigner::AssignMemory() {
  100. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  101. if (mem_assigner->Assign() != ge::SUCCESS) {
  102. GELOGE(ge::FAILED, "[Assign][GraphMem]graph_id:%u, graph_name:%s",
  103. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  104. return ge::FAILED;
  105. }
  106. for (auto pair : mem_assigner->GetMemOffsets()) {
  107. MemoryOffset offset(pair.first, pair.second);
  108. memory_offset_.emplace(pair.first, offset);
  109. }
  110. // base memtype offset must be exist
  111. auto it = mem_assigner->GetMemOffsets().find(RT_MEMORY_HBM);
  112. if (it == mem_assigner->GetMemOffsets().end()) {
  113. MemoryOffset memory_offset(RT_MEMORY_HBM, 0);
  114. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  115. }
  116. it = mem_assigner->GetMemOffsets().find(RT_MEMORY_P2P_DDR);
  117. if (it == mem_assigner->GetMemOffsets().end()) {
  118. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, 0);
  119. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  120. }
  121. auto session_id = compute_graph_->GetSessionID();
  122. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  123. auto variable_assigner =
  124. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  125. if (variable_assigner == nullptr) {
  126. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  127. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  128. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  129. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  130. return ge::FAILED;
  131. }
  132. if (variable_assigner->Assign() != ge::SUCCESS) {
  133. return ge::FAILED;
  134. }
  135. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  136. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  137. mem_assigner_ = std::move(mem_assigner);
  138. return ge::SUCCESS;
  139. }
  140. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  141. auto variable_assigner =
  142. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  143. if (variable_assigner == nullptr) {
  144. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  145. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  146. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  147. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  148. return ge::FAILED;
  149. }
  150. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  151. return ge::FAILED;
  152. }
  153. return ge::SUCCESS;
  154. }
  155. ge::Status GraphMemoryAssigner::AssignMemory2HasRefAttrNode() {
  156. auto variable_assigner =
  157. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  158. if (variable_assigner == nullptr) {
  159. GELOGE(ge::FAILED, "[New][Object:VariableMemoryAssigner]graph_id:%u, graph_name:%s",
  160. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  161. REPORT_CALL_ERROR("E19999", "New Object:VariableMemoryAssigner failed, "
  162. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  163. }
  164. if (variable_assigner->AssignMemory2HasRefAttrNode() != ge::SUCCESS) {
  165. return ge::FAILED;
  166. }
  167. return ge::SUCCESS;
  168. }
  169. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  170. int64_t dim_index, int64_t &output_mem_size,
  171. int64_t &batch_dim_num, int64_t &out_size) {
  172. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  173. if (graph_status != GRAPH_SUCCESS) {
  174. GELOGE(FAILED, "[Get][TensorSize]");
  175. REPORT_CALL_ERROR("E19999", "Get tensor size failed");
  176. return FAILED;
  177. }
  178. GeShape output_shape = output_desc->GetShape();
  179. std::vector<int64_t> output_dims = output_shape.GetDims();
  180. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  181. REPORT_INNER_ERROR("E19999", "Inner param dim_index value:%ld invalid, bigger than dim size:%lu in shape:%s",
  182. dim_index, output_dims.size(), output_shape.ToString().c_str());
  183. GELOGE(FAILED, "[Check][Param:dim_index]value:%ld invalid, bigger than dim size:%lu in shape:%s",
  184. dim_index, output_dims.size(), output_shape.ToString().c_str());
  185. return FAILED;
  186. }
  187. for (int64_t index = 0; index < dim_index; index++) {
  188. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  189. batch_dim_num *= output_dims[index];
  190. output_dims[index] = 1;
  191. }
  192. output_shape = GeShape(output_dims);
  193. Format out_format = output_desc->GetFormat();
  194. DataType data_type = output_desc->GetDataType();
  195. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  196. if (graph_status != GRAPH_SUCCESS) {
  197. GELOGE(graph_status, "[Calc][TensorSize]");
  198. return FAILED;
  199. }
  200. if (output_mem_size < 0) {
  201. REPORT_INNER_ERROR("E19999", "After calculating, tensor memory size:%ld invalid, less than 0. "
  202. "shape:%s, format:%s, dtype:%s, maybe has dynamic shape",
  203. output_mem_size,
  204. output_shape.ToString().c_str(),
  205. TypeUtils::FormatToSerialString(out_format).c_str(),
  206. TypeUtils::DataTypeToSerialString(data_type).c_str());
  207. GELOGE(FAILED, "[Check][TensorSize]value:%ld invalid after calc, less than 0. shape:%s, format:%s, dtype:%s, "
  208. "maybe has dynamic shape",
  209. output_mem_size,
  210. output_shape.ToString().c_str(),
  211. TypeUtils::FormatToSerialString(out_format).c_str(),
  212. TypeUtils::DataTypeToSerialString(data_type).c_str());
  213. return FAILED;
  214. }
  215. return SUCCESS;
  216. }
  217. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<uint64_t, size_t> &mem_type_to_offset) {
  218. if (memory_offset_.empty()) {
  219. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  220. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  221. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  222. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  223. return ge::FAILED;
  224. }
  225. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph),
  226. "[ReAssign][ContinuousMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  227. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph),
  228. "[ReAssign][AtomicMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  229. GE_CHK_STATUS_RET(AssignBufferPoolMemory(),
  230. "[Assign][BufferPoolMemory] Failed! graph:%s", compute_graph_->GetName().c_str());
  231. size_t total_mem_offset = 0;
  232. for (auto pair : memory_offset_) {
  233. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  234. total_mem_offset += pair.second.mem_offset_;
  235. }
  236. auto session_id = compute_graph_->GetSessionID();
  237. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  238. GELOGE(ge::FAILED, "[Check][TotalMemOffset] %zu is greater than memory manager malloc max size %zu, "
  239. "graph_id:%u, graph_name:%s, reduce your batchsize or scale your model may solve problem",
  240. total_mem_offset, VarManager::Instance(session_id)->GetGraphMemoryMaxSize(),
  241. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  242. for (auto iter : mem_type_to_offset) {
  243. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  244. iter.second, iter.first);
  245. }
  246. REPORT_INPUT_ERROR(
  247. "E19022", std::vector<std::string>({"size", "item", "maxsize"}),
  248. std::vector<std::string>({std::to_string(total_mem_offset), "featuremap",
  249. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())}));
  250. return ge::FAILED;
  251. }
  252. return SUCCESS;
  253. }
  254. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<uint64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  255. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  256. if (priority_assigner == nullptr) {
  257. REPORT_INNER_ERROR("E19999", "InnerData priority_assigner nullptr, not expected, graph_id:%u, graph_name:%s",
  258. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  259. GELOGE(FAILED, "[Check][InnerData:priority_assigner]nullptr is invalid, "
  260. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  261. return ge::FAILED;
  262. }
  263. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  264. // set offset for zero copy block
  265. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  266. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  267. continue;
  268. }
  269. memory_block->Resize();
  270. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  271. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  272. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  273. }
  274. // set offset for zero copy nodes
  275. priority_assigner->SetOpMemOffset(true);
  276. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  277. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  278. if (iter == memory_offset_.end()) {
  279. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  280. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  281. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  282. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  283. return FAILED;
  284. }
  285. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  286. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  287. zero_mem_copy_size);
  288. return SUCCESS;
  289. }
  290. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  291. if (op_desc == nullptr) {
  292. return 0;
  293. };
  294. bool is_continuous = false;
  295. uint32_t continuous_type = 0;
  296. // If GetBool fail, is_continuous is false.
  297. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  298. if (is_continuous) {
  299. continuous_type |= kTypeInput;
  300. } else {
  301. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  302. if (is_continuous) {
  303. bool attr_reuse = false;
  304. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  305. if (attr_reuse) {
  306. continuous_type |= kTypeInputNoPadding;
  307. }
  308. }
  309. }
  310. is_continuous = false;
  311. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  312. if (is_continuous) {
  313. continuous_type |= kTypeOutput;
  314. } else {
  315. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  316. if (is_continuous) {
  317. bool attr_reuse = false;
  318. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  319. if (attr_reuse) {
  320. continuous_type |= kTypeOutputNoPadding;
  321. }
  322. }
  323. }
  324. if (continuous_type != 0) {
  325. GELOGI("[Get][MemType:Continuous]Current node %s, value is %d", op_desc->GetName().c_str(), continuous_type);
  326. }
  327. return continuous_type;
  328. }
  329. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  330. int64_t &tensor_size, int64_t &nopadding_size) {
  331. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  332. REPORT_INNER_ERROR("E19999", "InnerData param op_desc or output_desc is nullptr, not expected");
  333. GELOGE(FAILED, "[Check][Param]op_desc or output_desc is nullptr");
  334. }
  335. tensor_size = 0;
  336. nopadding_size = 0;
  337. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  338. if (is_nopadding) {
  339. int64_t attr_dim_index;
  340. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  341. if (!get_attr_dim_flag) {
  342. REPORT_INNER_ERROR("E19999", "Get Attr:%s failed, op_name:%s",
  343. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  344. GELOGE(FAILED, "[Get][Attr:%s]fail for op_name:%s",
  345. ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX.c_str(), op_desc->GetName().c_str());
  346. return FAILED;
  347. }
  348. // Calculate tensor real size of each piece of data and out size of complete data
  349. int64_t batch_dim_num = 1;
  350. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  351. SUCCESS) {
  352. REPORT_CALL_ERROR("E19999", "CalculateTensorRealSizeAndOutSize failed, attr_dim_index:%ld, op_name:%s",
  353. attr_dim_index, op_desc->GetName().c_str());
  354. GELOGE(FAILED, "[Calculate][NopaddingSize]failed for node %s, attr_dim_index:%ld",
  355. op_desc->GetName().c_str(), attr_dim_index);
  356. return FAILED;
  357. }
  358. } else {
  359. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  360. REPORT_INNER_ERROR("E19999", "Get Tensor Size failed, op_name:%s", op_desc->GetName().c_str());
  361. GELOGE(FAILED, "[Get][TensorSize]failed in padding case, op_name:%s", op_desc->GetName().c_str());
  362. return FAILED;
  363. }
  364. }
  365. if ((tensor_size < 0) || (nopadding_size < 0)) {
  366. REPORT_INNER_ERROR("E19999", "GetMemorySize fail, "
  367. "tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  368. tensor_size, nopadding_size, op_desc->GetName().c_str());
  369. GELOGE(FAILED, "[Get][MemorySize]tensor_size:%ld or nopadding_size:%ld less than 0, invalid, op_name:%s",
  370. tensor_size, nopadding_size, op_desc->GetName().c_str());
  371. return FAILED;
  372. }
  373. return SUCCESS;
  374. }
  375. void AlignMemOffset(int64_t &mem_align_size) {
  376. if (mem_align_size <= 0) {
  377. return;
  378. }
  379. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  380. }
  381. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  382. bool is_peer_output_continuous = false;
  383. // If GetBool fail, is_peer_output_continuous is false.
  384. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  385. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  386. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  387. // conflict between the two, we can not support it.
  388. auto peer_output_size = peer_op_desc->GetOutputsSize();
  389. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  390. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  391. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  392. " requires continuous output. There may be conflict between the two." +
  393. "This node is not supported now.";
  394. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  395. return true;);
  396. bool is_peer_reference = false;
  397. // If GetBool fail, is_peer_reference is false.
  398. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  399. GE_IF_BOOL_EXEC(is_peer_reference,
  400. std::string warning = "[Check][Continuous]Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  401. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  402. " is ref. There may be conflict between the two.";
  403. GELOGW("%s", warning.c_str());
  404. return false;);
  405. return false;
  406. }
  407. /// op1 -> node -> op2
  408. /// return true when node is ref from input, and op1 or op2 is reuse input from output
  409. bool GraphMemoryAssigner::IsRefFromInputOpCascade(const NodePtr &node) {
  410. std::unordered_set<int32_t> ref_input_index;
  411. int32_t reuse_in_index = -1;
  412. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  413. bool reuse_input = GraphUtils::IsRefFromInput(out_anchor, reuse_in_index);
  414. if (reuse_input) {
  415. GELOGD("IsRefFromInputOpCascade: cur node:%s:%d is ref", node->GetName().c_str(), reuse_in_index);
  416. ref_input_index.insert(reuse_in_index);
  417. }
  418. }
  419. bool ref_from_input = !ref_input_index.empty();
  420. if (!ref_from_input) {
  421. return false;
  422. }
  423. for (const auto &in_anchor : node->GetAllInDataAnchors()) {
  424. const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor();
  425. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  426. auto in_node = peer_out_anchor->GetOwnerNode();
  427. if (isVariableMemoryNode(in_node) && (ref_input_index.count(in_anchor->GetIdx()) > 0)) {
  428. GELOGD("Reuse variable memory, input node:%s, type:%s.", in_node->GetName().c_str(), in_node->GetType().c_str());
  429. return false;
  430. }
  431. if (ref_from_input && GraphUtils::IsRefFromInput(peer_out_anchor, reuse_in_index)) {
  432. GELOGD("IsRefFromInputOpCascade: in node[%s] is ref, reuse index is:%d",
  433. in_node->GetName().c_str(), reuse_in_index);
  434. return true;
  435. }
  436. }
  437. for (const auto &out_anchor : node->GetAllOutDataAnchors()) {
  438. const auto &peer_in_anchors = out_anchor->GetPeerInDataAnchors();
  439. for (const auto &peer_in_anchor : peer_in_anchors) {
  440. auto peer_in_node = peer_in_anchor->GetOwnerNode();
  441. GE_IF_BOOL_EXEC(peer_in_node == nullptr, continue);
  442. for (const auto &peer_in_node_out_anchor : peer_in_node->GetAllOutDataAnchors()) {
  443. if (ref_from_input && GraphUtils::IsRefFromInput(peer_in_node_out_anchor, reuse_in_index)) {
  444. GELOGD("IsRefFromInputOpCascade: out node[%s] is ref, reuse index is:%d",
  445. peer_in_node_out_anchor->GetOwnerNode()->GetName().c_str(), reuse_in_index);
  446. return true;
  447. }
  448. }
  449. }
  450. }
  451. return false;
  452. }
  453. /// node:in0(in0 reuse out0) -> peer_node:out0
  454. /// update peer_node's 0th output offset with node's 0th output offset
  455. Status GraphMemoryAssigner::UpdateRefOpOffsetReverse(const NodePtr &node) {
  456. map<int32_t, int32_t> out2ins;
  457. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node:%s",
  458. node->GetName().c_str());
  459. auto op_desc = node->GetOpDesc();
  460. GE_CHECK_NOTNULL(op_desc);
  461. vector<int64_t> output_list = op_desc->GetOutputOffset();
  462. for (const auto &out2in : out2ins) {
  463. auto reuse_in_anchor = node->GetInDataAnchor(out2in.second);
  464. GE_CHECK_NOTNULL(reuse_in_anchor);
  465. auto peer_out_anchor = reuse_in_anchor->GetPeerOutAnchor();
  466. GE_CHECK_NOTNULL(peer_out_anchor);
  467. auto peer_node = peer_out_anchor->GetOwnerNode();
  468. GE_CHECK_NOTNULL(peer_node);
  469. if (isVariableMemoryNode(peer_node)) {
  470. GELOGW("Peer node to update is %s, skip it. Node name:%s.",
  471. peer_node->GetType().c_str(), peer_node->GetName().c_str());
  472. continue;
  473. }
  474. auto peer_op_desc = peer_node->GetOpDesc();
  475. GE_CHECK_NOTNULL(peer_op_desc);
  476. vector<int64_t> peer_output_list = peer_op_desc->GetOutputOffset();
  477. if ((peer_out_anchor->GetIdx() >= static_cast<int>(peer_output_list.size()))
  478. || (out2in.first >= static_cast<int32_t>(output_list.size()))) {
  479. GELOGW("out of range, peer_out_anchor:%d, peer_output_list size:%zu, out2in:%d, output_list size:%zu",
  480. peer_out_anchor->GetIdx(),
  481. peer_output_list.size(),
  482. out2in.first,
  483. output_list.size());
  484. continue;
  485. }
  486. peer_output_list.at(peer_out_anchor->GetIdx()) = output_list.at(out2in.first);
  487. peer_op_desc->SetOutputOffset(peer_output_list);
  488. GELOGD("UpdateRefOpOffsetReverse: Node[%s] output[%d] is set from node[%s] output index[%d] offset[%ld]",
  489. peer_node->GetName().c_str(),
  490. peer_out_anchor->GetIdx(),
  491. node->GetName().c_str(),
  492. out2in.first,
  493. output_list.at(out2in.first));
  494. }
  495. return SUCCESS;
  496. }
  497. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  498. Status ret;
  499. // Stored nodes which need assign continuous input memory in `reverse topo order`
  500. std::vector<NodePtr> nodes_stack;
  501. std::map<NodePtr, uint32_t> node_2_continuous_type;
  502. // Traverse nodes
  503. for (auto &node : compute_graph_->GetAllNodes()) {
  504. GE_CHECK_NOTNULL(node);
  505. uint32_t continuous_type;
  506. auto iter = node_2_continuous_type.find(node);
  507. if (iter == node_2_continuous_type.end()) {
  508. continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  509. node_2_continuous_type.emplace(node, continuous_type);
  510. } else {
  511. continuous_type = iter->second;
  512. }
  513. // Assign continuous input memory
  514. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  515. if (IsRefFromInputOpCascade(node)) {
  516. nodes_stack.push_back(node);
  517. GELOGD("Ref: Push node:%s to stack", node->GetName().c_str());
  518. } else if (continuous_input) {
  519. if (AssignContinuousInputMemoryWithAtomicProcessDirectly(node, node_2_continuous_type)) {
  520. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, continuous_type),
  521. "[Assign][Memory:Continuous:Input]fail for node:%s", node->GetName().c_str())
  522. } else {
  523. nodes_stack.push_back(node);
  524. GELOGD("Continuous: Push node:%s to stack", node->GetName().c_str());
  525. }
  526. }
  527. // Assign continuous output memory
  528. int64_t memory_type = RT_MEMORY_HBM;
  529. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  530. if (continuous_output) {
  531. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"),
  532. "[Get][MemType]fail for node:%s", node->GetName().c_str());
  533. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  534. if (ret != ge::SUCCESS) {
  535. GELOGE(ret, "[Assign][Memory:Continuous:Ouput]fail for node:%s", node->GetName().c_str());
  536. return ret;
  537. }
  538. }
  539. }
  540. // Assign continuous input memory in `reverse topo order` which stored before
  541. while (!nodes_stack.empty()){
  542. auto node = nodes_stack.back();
  543. nodes_stack.pop_back();
  544. auto iter = node_2_continuous_type.find(node);
  545. if (iter == node_2_continuous_type.end()) {
  546. REPORT_INNER_ERROR("E19999", "Get ContinuousType from node_2_continuous_type map failed for node:%s",
  547. node->GetName().c_str());
  548. GELOGE(FAILED, "[Get][ContinuousType] find fail for node:%s", node->GetName().c_str());
  549. return FAILED;
  550. }
  551. if (((iter->second & kTypeInput) != 0) || ((iter->second & kTypeInputNoPadding) != 0)) {
  552. GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true),
  553. "[Assign][Memory:Continuous:Input]fail for node:%s.", node->GetName().c_str())
  554. } else {
  555. GE_CHK_STATUS_RET(UpdateRefOpOffsetReverse(node),
  556. "[Update][Memory:Reference:Output]fail for node:%s", node->GetName().c_str())
  557. }
  558. }
  559. for (auto pair : memory_offset_) {
  560. GELOGD("[Reassign][Memory:Continuous]At last, memory type = %ld, mem offset = %zu", pair.first,
  561. pair.second.mem_offset_);
  562. }
  563. return ge::SUCCESS;
  564. }
  565. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  566. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) {
  567. GELOGI("[Assign][Memory:Input:Continuous]start for Current node %s", node->GetName().c_str());
  568. auto iter = memory_offset_.find(memory_type);
  569. if (iter == memory_offset_.end()) {
  570. REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
  571. "for node:%s, ", memory_type, node->GetName().c_str());
  572. GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
  573. memory_type, node->GetName().c_str());
  574. return FAILED;
  575. }
  576. // The head and tail of hcom continuous input should be added 512
  577. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  578. continuous_mem_start = iter->second.mem_offset_;
  579. int64_t mem_offset = iter->second.mem_offset_;
  580. int64_t extra_memory_size = 0;
  581. bool is_continuous_input_allocated = false;
  582. auto op_desc = node->GetOpDesc();
  583. GE_CHECK_NOTNULL(op_desc);
  584. vector<int64_t> output_list_this = op_desc->GetOutputOffset();
  585. if (output_list_this.empty()) {
  586. REPORT_INNER_ERROR("E19999", "No output offset in node :%s, not expected",
  587. node->GetName().c_str());
  588. GELOGE(FAILED, "[Get][OutputOffset] empty is invalid, node:%s", node->GetName().c_str());
  589. return FAILED;
  590. }
  591. (void) ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  592. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  593. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  594. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  595. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  596. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  597. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  598. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  599. int64_t tensor_desc_size = 0;
  600. int64_t nopadding_size = 0;
  601. int64_t real_size = 0;
  602. std::vector<int64_t> offsets_of_fusion = {};
  603. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  604. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  605. if (lx_fusion) {
  606. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  607. std::string error = "fusion: peer node:" + FmtToStr(peer_op_desc->GetName()) +
  608. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  609. " is out of range:" + FmtToStr(offsets_of_fusion.size());
  610. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  611. return FAILED;
  612. }
  613. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  614. tensor_desc_size = nopadding_size;
  615. } else {
  616. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  617. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  618. return FAILED;
  619. }
  620. }
  621. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  622. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  623. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  624. std::string error = "peer node:" + FmtToStr(peer_op_desc->GetName()) +
  625. " anchor_index:" + FmtToStr(peer_out_data_anchor->GetIdx()) +
  626. " is out of range:" + FmtToStr(output_list.size());
  627. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  628. return FAILED;
  629. }
  630. // when continuous input has been allocated first input is beginning offset
  631. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  632. if (is_allocated_first_input) {
  633. std::map<int32_t, int32_t> out2ins;
  634. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s",
  635. node->GetName().c_str());
  636. // output is beginning offset, set offset for input; only support this case now
  637. if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) {
  638. auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx());
  639. output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first);
  640. peer_op_desc->SetOutputOffset(output_list);
  641. GELOGI("[Update][Offset]Node %s out %d ref in %d input node %s, use output offset %ld update %ld",
  642. node->GetName().c_str(), out2ins.begin()->first, out2ins.begin()->second,
  643. peer_op_desc->GetName().c_str(), output_list_this.at(out2ins.begin()->first), peer_output_offset);
  644. } else {
  645. GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu.", node->GetName().c_str(),
  646. out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size());
  647. }
  648. // first input is beginning offset
  649. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  650. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  651. } else {
  652. // set offset for input
  653. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  654. peer_op_desc->SetOutputOffset(output_list);
  655. }
  656. int64_t align_size = tensor_desc_size;
  657. if (is_nopadding) {
  658. mem_offset += nopadding_size;
  659. extra_memory_size += (tensor_desc_size - nopadding_size);
  660. real_size = nopadding_size;
  661. } else {
  662. ge::AlignMemOffset(align_size);
  663. mem_offset += align_size;
  664. // The head and tail of hcom continuous input should be added 512
  665. extra_memory_size = MEM_ALIGN_SIZE;
  666. real_size = tensor_desc_size;
  667. }
  668. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  669. "size[%zu] realsize[%ld] nopadding size[%d]", node->GetOwnerComputeGraph()->GetName().c_str(),
  670. peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(),
  671. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  672. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  673. }
  674. mem_offset += extra_memory_size;
  675. ge::AlignMemOffset(mem_offset);
  676. continuous_mem_size = mem_offset - continuous_mem_start;
  677. if (is_continuous_input_allocated) {
  678. // not allocate memory here, so no need add 512 in header
  679. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  680. } else {
  681. iter->second.mem_offset_ = mem_offset;
  682. }
  683. return SUCCESS;
  684. }
  685. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  686. auto in_data_anchor_list = node->GetAllInDataAnchors();
  687. if (in_data_anchor_list.empty()) {
  688. REPORT_INNER_ERROR("E19999", "InAnchor list empty in node:%s, not expect",
  689. node->GetName().c_str());
  690. GELOGE(FAILED, "[Get][InAnchor]empty is invalid, node:%s", node->GetName().c_str());
  691. return FAILED;
  692. }
  693. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  694. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr,
  695. REPORT_INNER_ERROR("E19999", "PeerAcnhor is null, not expect for node:%s",
  696. node->GetName().c_str());
  697. GELOGE(ge::FAILED, "[Check][PeerAnchor]null is invalid, node:%s", node->GetName().c_str());
  698. return ge::FAILED);
  699. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  700. GE_IF_BOOL_EXEC(peer_op_desc == nullptr,
  701. REPORT_INNER_ERROR("E19999", "PeerOpDesc is null, not expect for node:%s",
  702. node->GetName().c_str());
  703. GELOGE(ge::FAILED, "[Check][PeerOpDesc]null is invalid, node:%s", node->GetName().c_str());
  704. return ge::FAILED);
  705. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  706. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  707. REPORT_INNER_ERROR("E19999", "PeerAnchorIndex:%d bigger than in_offset size:%lu, judge invalid for node:%s",
  708. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  709. GELOGE(FAILED, "[Check][Index:PeerOutDataAnchor]PeerIndex:%d bigger than in_offset size:%lu, node:%s",
  710. peer_out_data_anchor->GetIdx(), in_node_output_offsets.size(), node->GetName().c_str());
  711. return FAILED;
  712. }
  713. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  714. return SUCCESS;
  715. }
  716. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  717. uint32_t continuous_type) {
  718. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  719. auto out_op_desc = node->GetOpDesc();
  720. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  721. REPORT_INNER_ERROR("E19999", "OpDesc is null, not expect for node:%s",
  722. node->GetName().c_str());
  723. GELOGE(ge::FAILED, "[Check][OpDesc]null is invalid, node:%s", node->GetName().c_str()));
  724. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  725. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  726. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, invalid in node:%s",
  727. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  728. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  729. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  730. return ge::FAILED;
  731. }
  732. int64_t mem_offset = 0;
  733. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  734. if (is_nopadding) {
  735. // out tensor memory must be reused input tensor memory
  736. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  737. return ge::FAILED;
  738. }
  739. } else {
  740. // Get the reference type of the node, default is false
  741. bool is_ref = false;
  742. // If GetBool fail, is_ref is false.
  743. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  744. // If the output is ref type and refers to the ref of an input, the name of the output
  745. // and the input are the same. Ge encounters ref type, finds matching relationship according
  746. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  747. if (is_ref) {
  748. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  749. node->GetName().c_str());
  750. return SUCCESS;
  751. }
  752. mem_offset = output_list[0];
  753. }
  754. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  755. output_list[out_data_anchor->GetIdx()] = mem_offset;
  756. int64_t tensor_desc_size = 0;
  757. int64_t nopadding_size = 0;
  758. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  759. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  760. return FAILED;
  761. }
  762. if (is_nopadding) {
  763. mem_offset += nopadding_size;
  764. } else {
  765. mem_offset += tensor_desc_size;
  766. ge::AlignMemOffset(mem_offset);
  767. }
  768. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  769. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  770. out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(),
  771. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  772. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  773. }
  774. out_op_desc->SetOutputOffset(output_list);
  775. return ge::SUCCESS;
  776. }
  777. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  778. // key:dynamic batch, batch name
  779. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  780. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  781. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  782. if (status != SUCCESS) {
  783. GELOGE(status, "[Filter][AtomicNode]failed in graph_id:%u, graph_name:%s",
  784. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  785. return status;
  786. }
  787. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  788. if (mem_iter == memory_offset_.end()) {
  789. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  790. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  791. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  792. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  793. return FAILED;
  794. }
  795. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  796. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  797. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  798. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  799. for (auto &iter : iter_batch.second) {
  800. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  801. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  802. for (auto &atomic_node : iter.second) {
  803. vector<int64_t> mem_offset_end;
  804. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  805. if (status != SUCCESS) {
  806. GELOGE(status, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  807. atomic_node->GetName().c_str());
  808. return status;
  809. }
  810. }
  811. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  812. if (atomic_mem_size != 0) {
  813. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  814. "[Set][Attr]fail for atomic addr clean node %s.", iter.first->GetName().c_str());
  815. }
  816. }
  817. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  818. }
  819. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  820. batch_atomic_mem_start = batch_max_mem_offset;
  821. for (auto &iter_batch : connecting_output_atomic_nodes) {
  822. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  823. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  824. GELOGE(FAILED, "[Assign][Memory]for nodes that connect to netoutput failed."
  825. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  826. return FAILED;
  827. }
  828. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  829. }
  830. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  831. return SUCCESS;
  832. }
  833. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  834. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  835. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  836. GE_CHECK_NOTNULL(compute_graph_);
  837. for (const auto &node : compute_graph_->GetAllNodes()) {
  838. if (node->GetType() == ATOMICADDRCLEAN) {
  839. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  840. const auto &out_control_anchor = node->GetOutControlAnchor();
  841. GE_CHECK_NOTNULL(out_control_anchor);
  842. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  843. if (peer_in_control_anchor != nullptr) {
  844. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  845. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  846. if (peer_in_node_desc != nullptr) {
  847. bool is_atomic_node = false;
  848. // If GetBool fail, is_atomic_node is false.
  849. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  850. if (is_atomic_node) {
  851. bool is_reference = false;
  852. // If GetBool fail, is_reference is false.
  853. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  854. if (is_reference) {
  855. REPORT_INNER_ERROR("E19999", "Op:%s cannot have both atomic and is_reference attribute, "
  856. "not support now", peer_in_node_desc->GetName().c_str());
  857. GELOGE(FAILED, "[Check][Attr]Op:%s cannot have both atomic and is_reference attribute, "
  858. "not support now", peer_in_node_desc->GetName().c_str());
  859. return ge::PARAM_INVALID;
  860. }
  861. std::string batch_label;
  862. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  863. vector<int> is_connecting_output;
  864. // If GetBool fail, attr is_connecting_output is an empty vector.
  865. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  866. if (is_connecting_output.empty()) {
  867. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  868. continue;
  869. }
  870. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  871. tmp_normal_atomic_nodes[batch_label].clear();
  872. break;
  873. }
  874. }
  875. }
  876. }
  877. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  878. if (!it_atomic_node.second.empty()) {
  879. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  880. }
  881. }
  882. }
  883. }
  884. return SUCCESS;
  885. }
  886. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  887. vector<int64_t> &mem_offset_end) {
  888. auto node_op_desc = node->GetOpDesc();
  889. // Assign atomic node output memory
  890. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  891. if (ret != SUCCESS) {
  892. GELOGE(ret, "[Assign][Memory:Ouput:Atomic]Failed for node:%s.", node_op_desc->GetName().c_str());
  893. return ret;
  894. }
  895. // Check and assign atomic node workspace memory
  896. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  897. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  898. if (!atomic_workspace_info.empty()) {
  899. bool is_fusion_node = false;
  900. // If GetBool fail, is_fusion_node is false.
  901. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  902. if (is_fusion_node) {
  903. // Assign fusion atomic node workspace memory
  904. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  905. } else {
  906. // Assign single ordinary atomic node workspace memory, not include fusion node
  907. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  908. }
  909. if (ret != SUCCESS) {
  910. GELOGE(ret, "[Assign][Memory:Atomic:Workspace]fail for node:%s.", node_op_desc->GetName().c_str());
  911. return ret;
  912. }
  913. } else {
  914. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  915. }
  916. return SUCCESS;
  917. }
  918. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  919. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  920. if (iter == memory_offset_.end()) {
  921. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  922. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  923. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  924. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  925. return FAILED;
  926. }
  927. for (auto &node : connect_netoutput_nodes) {
  928. GE_CHECK_NOTNULL(node);
  929. if (node->GetOpDesc() == nullptr) {
  930. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  931. continue;
  932. }
  933. // Atomic memory start addr
  934. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  935. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  936. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  937. vector<int64_t> mem_offset_end;
  938. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  939. GELOGE(FAILED, "[Assign][Memory]output atomic mem and workspace mem, fail for node name is %s.",
  940. node->GetName().c_str());
  941. return FAILED;
  942. }
  943. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  944. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  945. GELOGE(FAILED, "[Set][Attr:IndependentAtomic]fail for node:%s", node->GetName().c_str());
  946. return FAILED;
  947. }
  948. }
  949. return SUCCESS;
  950. }
  951. Status GraphMemoryAssigner::AssignReferenceMemory() {
  952. for (auto &node : compute_graph_->GetDirectNode()) {
  953. // Get the reference type of the node, default is false
  954. bool is_ref = false;
  955. // If GetBool fail, is_ref is false.
  956. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  957. if (!is_ref) {
  958. continue;
  959. }
  960. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  961. node->GetName().c_str());
  962. auto out_op_desc = node->GetOpDesc();
  963. GE_IF_BOOL_EXEC(out_op_desc == nullptr,
  964. REPORT_INNER_ERROR("E19999", "out_op_desc is null.");
  965. GELOGE(ge::FAILED, "[Check][Param] out_op_desc is null."); return ge::FAILED);
  966. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  967. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  968. REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s",
  969. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  970. GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
  971. out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
  972. return ge::FAILED;
  973. }
  974. map<string, int> input_name_index;
  975. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  976. int index = out_op_desc->GetInputIndexByName(input_name);
  977. input_name_index.emplace(input_name, index);
  978. }
  979. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  980. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  981. auto iter = input_name_index.find(out_data_anchor_name);
  982. if (iter != input_name_index.end()) {
  983. int index = iter->second;
  984. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  985. iter->first.c_str(), out_data_anchor_name.c_str());
  986. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  987. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  988. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  989. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  990. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  991. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  992. GE_CHECK_NOTNULL(peer_out_op_desc);
  993. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  994. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  995. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  996. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  997. } else {
  998. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  999. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  1000. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  1001. }
  1002. }
  1003. out_op_desc->SetOutputOffset(output_list);
  1004. }
  1005. return ge::SUCCESS;
  1006. }
  1007. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  1008. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  1009. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  1010. if (peer_out_data_anchor == nullptr) {
  1011. continue;
  1012. }
  1013. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  1014. if (peer_op_desc == nullptr) {
  1015. continue;
  1016. }
  1017. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  1018. (peer_op_desc->GetType() == VARIABLE)) {
  1019. REPORT_INNER_ERROR("E19999", "node(type:%s, name:%s) link to atomic node(name:%s), "
  1020. "this situation not supported now",
  1021. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  1022. GELOGE(ge::FAILED, "[Check][Link]node(type:%s, name:%s) link to atomic node(name:%s), "
  1023. "this situation not supported now",
  1024. peer_op_desc->GetType().c_str(), peer_op_desc->GetName().c_str(), node->GetName().c_str());
  1025. return false;
  1026. }
  1027. }
  1028. return true;
  1029. }
  1030. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  1031. auto op_desc = node->GetOpDesc();
  1032. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  1033. mem_offset_end.clear();
  1034. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  1035. vector<int64_t> atomic_output_index;
  1036. // If GetListInt fail, atomic_output_index is empty.
  1037. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  1038. // Check atomic output
  1039. vector<int64_t> output_list = op_desc->GetOutputOffset();
  1040. if (atomic_output_index.size() > output_list.size()) {
  1041. std::string error =
  1042. "Op:" + FmtToStr(node->GetName()) + "'s size:" + FmtToStr(atomic_output_index.size()) +
  1043. " of atomic_output_index is more than the size:" + FmtToStr(output_list.size()) + " of output_list";
  1044. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1045. return ge::FAILED;
  1046. }
  1047. auto output_list_size = static_cast<int64_t>(output_list.size());
  1048. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  1049. if (iter == memory_offset_.end()) {
  1050. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1051. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1052. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1053. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1054. return FAILED;
  1055. }
  1056. for (auto &output_index : atomic_output_index) {
  1057. if (output_index >= output_list_size) {
  1058. std::string error =
  1059. "Op:" + FmtToStr(node->GetName()) + "'s atomic_output index:" + FmtToStr(output_index) +
  1060. " is more than the size:" + FmtToStr(output_list_size) + " of output_list.";
  1061. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1062. return ge::PARAM_INVALID;
  1063. }
  1064. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  1065. bool is_assigned_mem = false;
  1066. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  1067. GELOGE(ge::FAILED, "[Get][MemoryAssignmentStatus]fail for node %s, out_index:%ld",
  1068. node->GetName().c_str(), output_index);
  1069. return ge::FAILED;
  1070. }
  1071. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  1072. if (is_assigned_mem) {
  1073. GELOGI(
  1074. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  1075. "ReAssignContinuousMemory function.",
  1076. op_desc->GetName().c_str());
  1077. continue;
  1078. }
  1079. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  1080. int64_t size = 0;
  1081. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  1082. GELOGI("Get size failed");
  1083. }
  1084. output_list[output_index] = iter->second.mem_offset_;
  1085. std::string batch_label;
  1086. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1087. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%u] "
  1088. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  1089. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  1090. size, size, batch_label.c_str());
  1091. iter->second.mem_offset_ += size;
  1092. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1093. mem_offset_end.emplace_back(iter->second.mem_offset_);
  1094. }
  1095. op_desc->SetOutputOffset(output_list);
  1096. return ge::SUCCESS;
  1097. }
  1098. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  1099. bool &is_mem_assigned) {
  1100. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  1101. std::string error =
  1102. "Op:" + FmtToStr(node->GetName()) + "'s output index:" + FmtToStr(output_index) +
  1103. " is more than the size:" + FmtToStr(node->GetAllOutDataAnchors().size()) + " of node's AllOutDataAnchors.";
  1104. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1105. return ge::PARAM_INVALID;
  1106. }
  1107. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  1108. GE_CHECK_NOTNULL(out_data_anchor);
  1109. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  1110. for (auto &input_anchor : input_anchors) {
  1111. auto output_node = input_anchor->GetOwnerNode();
  1112. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  1113. /// has been assigned
  1114. vector<int64_t> atomic_input_index;
  1115. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  1116. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  1117. is_mem_assigned = true;
  1118. break;
  1119. }
  1120. }
  1121. return SUCCESS;
  1122. }
  1123. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1124. map<string, map<int64_t, int64_t>> &workspace_info,
  1125. vector<int64_t> &mem_offset_end) {
  1126. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  1127. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1128. if (mem_type_iter == memory_offset_.end()) {
  1129. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1130. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1131. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1132. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1133. return FAILED;
  1134. }
  1135. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  1136. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  1137. if (op_desc->GetName() != iter->first) {
  1138. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  1139. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  1140. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1141. return ge::PARAM_INVALID;
  1142. }
  1143. if (iter->second.empty()) {
  1144. continue;
  1145. }
  1146. for (auto &info_iter : iter->second) {
  1147. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1148. auto workspace_size = info_iter.second;
  1149. if (workspace_index >= workspace_vector.size()) {
  1150. std::string error = "The workspace index:" + FmtToStr(workspace_index) +
  1151. " is more than the size:" + FmtToStr(workspace_vector.size()) + " of workspace vector in op:" +
  1152. op_desc->GetName().c_str();
  1153. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  1154. return ge::PARAM_INVALID;
  1155. }
  1156. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  1157. std::string batch_label;
  1158. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1159. GELOGI(
  1160. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1161. "memtype[%u] size[%ld] real_size[%ld] batch[%s].",
  1162. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  1163. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  1164. batch_label.c_str());
  1165. mem_type_iter->second.mem_offset_ += workspace_size;
  1166. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1167. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1168. }
  1169. }
  1170. op_desc->SetWorkspace(workspace_vector);
  1171. return SUCCESS;
  1172. }
  1173. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1174. map<string, map<int64_t, int64_t>> &workspace_info,
  1175. vector<int64_t> &mem_offset_end) {
  1176. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1177. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  1178. if (mem_type_iter == memory_offset_.end()) {
  1179. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ does not have type[HBM], not expected, "
  1180. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1181. GELOGE(FAILED, "[Check][InnerData]memory_offset_ does not have memory type[HBM]"
  1182. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1183. return FAILED;
  1184. }
  1185. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1186. for (auto &iter : workspace_info) {
  1187. if (iter.second.empty()) {
  1188. continue;
  1189. }
  1190. map<int64_t, int64_t> index_offset;
  1191. for (auto &info_iter : iter.second) {
  1192. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1193. auto workspace_size = info_iter.second;
  1194. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  1195. std::string batch_label;
  1196. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  1197. GELOGI(
  1198. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1199. "memtype[%u] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  1200. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  1201. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  1202. mem_type_iter->second.mem_offset_ += workspace_size;
  1203. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  1204. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  1205. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1206. }
  1207. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1208. }
  1209. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1210. REPORT_INNER_ERROR("E19999", "Set Attr:%s fail for node:%s",
  1211. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1212. GELOGE(FAILED, "[Set][Attr:%s]fail for node:%s.",
  1213. EXT_ATTR_ATOMIC_WORKSPACE_OFFSET.c_str(), op_desc->GetName().c_str());
  1214. return FAILED;
  1215. }
  1216. return SUCCESS;
  1217. }
  1218. Status GraphMemoryAssigner::CheckOffset() {
  1219. std::map<std::string, std::string> anchor_to_symbol;
  1220. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  1221. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  1222. REPORT_CALL_ERROR("E19999", "Get ref-mapping for graph %s failed", compute_graph_->GetName().c_str());
  1223. GELOGE(FAILED, "[Get][RefMapping]fail for graph %s", compute_graph_->GetName().c_str());
  1224. return FAILED;
  1225. }
  1226. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1227. GE_CHECK_NOTNULL(node->GetOpDesc());
  1228. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1229. for (auto input : input_list) {
  1230. if (input == ge::kInvalidOffset) {
  1231. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  1232. + " in node" + FmtToStr(node->GetName());
  1233. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1234. return FAILED;
  1235. }
  1236. }
  1237. bool need_update_output = false;
  1238. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1239. for (uint32_t i = 0; i < output_list.size(); ++i) {
  1240. if (output_list[i] == ge::kInvalidOffset) {
  1241. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  1242. + " in node" + FmtToStr(node->GetName());
  1243. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1244. return FAILED;
  1245. }
  1246. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  1247. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  1248. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  1249. output_list[i] = symbol_offset;
  1250. need_update_output = true;
  1251. }
  1252. }
  1253. }
  1254. if (need_update_output) {
  1255. node->GetOpDesc()->SetOutputOffset(output_list);
  1256. }
  1257. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1258. for (auto workspace : workspace_list) {
  1259. if (workspace == ge::kInvalidOffset) {
  1260. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1261. + " in node" + FmtToStr(node->GetName());
  1262. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1263. return FAILED;
  1264. }
  1265. }
  1266. // check reuse input and output
  1267. GE_CHK_STATUS_RET(CheckRefNodeOffset(node), "[Check][Offset]fail for node: %s", node->GetName().c_str());
  1268. }
  1269. return SUCCESS;
  1270. }
  1271. ge::Status GraphMemoryAssigner::CheckRefNodeOffset(const NodePtr &node) {
  1272. GE_CHECK_NOTNULL(node);
  1273. std::map<int32_t, int32_t> out2ins;
  1274. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1275. auto opdesc = node->GetOpDesc();
  1276. GE_CHECK_NOTNULL(opdesc);
  1277. auto output_list = opdesc->GetOutputOffset();
  1278. auto input_list = opdesc->GetInputOffset();
  1279. for (const auto &out2in : out2ins) {
  1280. auto out_i = out2in.first;
  1281. if (static_cast<size_t>(out_i) >= output_list.size()) {
  1282. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1283. FmtToStr(output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1284. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1285. return ge::FAILED;
  1286. }
  1287. auto in_i = out2in.second;
  1288. if (static_cast<size_t>(in_i) >= input_list.size()) {
  1289. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset size" +
  1290. FmtToStr(input_list.size()) + "should bigger than ref input index" + FmtToStr(in_i);
  1291. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1292. return ge::FAILED;
  1293. }
  1294. if (output_list[out_i] != input_list[in_i]) {
  1295. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "input offset " + FmtToStr(input_list[in_i]) +
  1296. "should equal to output offset" + FmtToStr(output_list[out_i]) + "with ref in" +
  1297. FmtToStr(in_i) + "to output" + FmtToStr(out_i);
  1298. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1299. return ge::FAILED;
  1300. }
  1301. }
  1302. return ge::SUCCESS;
  1303. }
  1304. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1305. if (memory_offset_.empty()) {
  1306. REPORT_INNER_ERROR("E19999", "InnerData memory_offset_ empty, not expected, graph_id:%u, graph_name:%s",
  1307. compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1308. GELOGE(FAILED, "[Check][InnerData:memory_offset_]empty is not expected, "
  1309. "graph_id:%u, graph_name:%s", compute_graph_->GetGraphID(), compute_graph_->GetName().c_str());
  1310. }
  1311. for (auto pair : memory_offset_) {
  1312. if ((pair.first != RT_MEMORY_HBM) && (pair.second.mem_offset_ == 0)) {
  1313. continue;
  1314. }
  1315. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1316. pair.second.mem_offset_, pair.first);
  1317. }
  1318. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1319. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1320. GELOGE(ge::FAILED, "[Update][Offset:Input]fail for op:%s", node->GetName().c_str());
  1321. return ge::FAILED;
  1322. }
  1323. }
  1324. return ge::SUCCESS;
  1325. }
  1326. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1327. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1328. return node;
  1329. }
  1330. if (NodeUtils::IsDynamicShape(node)) {
  1331. return node;
  1332. }
  1333. return NodeUtils::GetParentInput(node);
  1334. }
  1335. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1336. uint32_t parent_index = 0;
  1337. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1338. return SUCCESS;
  1339. }
  1340. // Subgraph Data Node, check for constant input.
  1341. std::string op_type;
  1342. const auto &in_node = NodeUtils::GetParentInput(node);
  1343. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1344. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1345. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1346. return SUCCESS; // Constant input.
  1347. }
  1348. // Memory allocated for dynamic shape subgraph Data.
  1349. if (NodeUtils::IsDynamicShape(node)) {
  1350. return SUCCESS;
  1351. }
  1352. const auto &owner = node->GetOwnerComputeGraph();
  1353. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1354. const auto parent_inputs = parent_desc->GetInputOffset();
  1355. if (parent_inputs.size() <= parent_index) {
  1356. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1357. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1358. FmtToStr(parent_index);
  1359. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1360. return FAILED;
  1361. }
  1362. input_list = {parent_inputs[parent_index]};
  1363. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1364. return SUCCESS;
  1365. }
  1366. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1367. vector<int64_t> origin_input_list;
  1368. vector<int64_t> memory_type;
  1369. auto tmp_op_desc = node->GetOpDesc();
  1370. origin_input_list = tmp_op_desc->GetInputOffset();
  1371. int64_t valid_input_index = 0;
  1372. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1373. std::map<int32_t, int32_t> out2ins;
  1374. GE_CHK_STATUS_RET(TryGetNodeRefIndexes(node, out2ins), "[Get][RefIndexes]fail for node: %s", node->GetName().c_str());
  1375. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1376. vector<int64_t> output_list;
  1377. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1378. if (peer_out_anchor == nullptr) {
  1379. continue;
  1380. }
  1381. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1382. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1383. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1384. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1385. output_list = last_peer_out_op_desc->GetOutputOffset();
  1386. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1387. if (output_list.size() > static_cast<size_t>(out_index)) {
  1388. int64_t peer_out_inner_offset = 0;
  1389. if (ge::AttrUtils::GetInt(last_peer_out_op_desc->MutableOutputDesc(out_index), ATTR_NAME_INNER_OFFSET,
  1390. peer_out_inner_offset)) {
  1391. (void)ge::AttrUtils::SetInt(tmp_op_desc->MutableInputDesc(anchor->GetIdx()), ATTR_NAME_INNER_OFFSET,
  1392. peer_out_inner_offset);
  1393. }
  1394. bool is_l1_type = false;
  1395. int64_t input_offset = output_list.at(out_index);
  1396. if (has_mem_type_attr && !origin_input_list.empty()) {
  1397. auto input_size = tmp_op_desc->GetInputsSize();
  1398. auto ori_input_offset_list_size = origin_input_list.size();
  1399. auto mem_type_size = memory_type.size();
  1400. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1401. std::string error = "Node" + FmtToStr(tmp_op_desc->GetName()) +
  1402. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1403. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1404. FmtToStr(ori_input_offset_list_size);
  1405. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1406. return ge::FAILED;
  1407. }
  1408. int64_t inner_offset = 0;
  1409. (void)ge::AttrUtils::GetInt(tmp_op_desc->MutableInputDesc(anchor->GetIdx()), ATTR_NAME_INNER_OFFSET,
  1410. inner_offset);
  1411. GELOGD("Node[%s] input[%d] has origin offset[%ld] origin_inner_offset[%ld]", tmp_op_desc->GetName().c_str(),
  1412. anchor->GetIdx(), origin_input_list[valid_input_index], inner_offset);
  1413. // L1 keep original input_offset
  1414. is_l1_type = (memory_type[valid_input_index] == RT_MEMORY_L1);
  1415. if (is_l1_type) {
  1416. input_offset = origin_input_list[valid_input_index];
  1417. } else {
  1418. // hbm input_offset = original input_offset + output_offset
  1419. if ((origin_input_list[valid_input_index] != 0) && (!tmp_op_desc->GetSubgraphInstanceNames().empty())) {
  1420. std::string error = "Node" + FmtToStr(tmp_op_desc->GetName()) +
  1421. +" has subgraphs which is conflict with has origin_input_list" +
  1422. FmtToStr(origin_input_list[valid_input_index]);
  1423. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1424. return ge::FAILED;
  1425. }
  1426. input_offset = origin_input_list[valid_input_index] + output_list.at(out_index);
  1427. (void)ge::AttrUtils::SetInt(tmp_op_desc->MutableInputDesc(anchor->GetIdx()), ATTR_NAME_INNER_OFFSET,
  1428. origin_input_list[valid_input_index] + inner_offset);
  1429. }
  1430. }
  1431. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1432. if (in_node->GetType() == CONSTANT) {
  1433. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1434. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1435. }
  1436. if (!is_l1_type) {
  1437. // update ref output_offset when input change
  1438. GE_CHK_STATUS_RET(UpdateRefOpOutputOffset(node, out2ins, anchor->GetIdx(), input_offset),
  1439. "[Update][RefOffset]fail for node: %s", node->GetName().c_str());
  1440. }
  1441. GELOGD("Node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", tmp_op_desc->GetName().c_str(),
  1442. anchor->GetIdx(), peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), out_index,
  1443. input_offset);
  1444. input_list.emplace_back(input_offset);
  1445. valid_input_index++;
  1446. }
  1447. }
  1448. return ge::SUCCESS;
  1449. }
  1450. ge::Status GraphMemoryAssigner::UpdateRefOpOutputOffset(const NodePtr &node, const std::map<int32_t, int32_t> &out2ins,
  1451. const int ref_in, const int64_t input_offset) const {
  1452. auto opdesc = node->GetOpDesc();
  1453. GE_CHECK_NOTNULL(opdesc);
  1454. int64_t inner_offset = 0;
  1455. bool has_inner_offset = ge::AttrUtils::GetInt(opdesc->MutableInputDesc(ref_in), ATTR_NAME_INNER_OFFSET, inner_offset);
  1456. for (const auto &out2in : out2ins) {
  1457. auto out_i = out2in.first;
  1458. auto in_i = out2in.second;
  1459. if (in_i == ref_in) {
  1460. auto origin_output_list = opdesc->GetOutputOffset();
  1461. if (static_cast<size_t>(out_i) >= origin_output_list.size()) {
  1462. std::string error = "Node" + FmtToStr(opdesc->GetName()) + "output offset size" +
  1463. FmtToStr(origin_output_list.size()) + "should bigger than ref out index" + FmtToStr(out_i);
  1464. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1465. return ge::FAILED;
  1466. }
  1467. origin_output_list[out_i] = input_offset;
  1468. opdesc->SetOutputOffset(origin_output_list);
  1469. if (has_inner_offset) {
  1470. (void)ge::AttrUtils::SetInt(opdesc->MutableOutputDesc(out_i), ATTR_NAME_INNER_OFFSET, inner_offset);
  1471. }
  1472. GELOGI("Node[%s] output[%d] is updated from reuse input index[%d] to offset[%ld], inner_offset[%ld]",
  1473. opdesc->GetName().c_str(), out_i, ref_in, input_offset, inner_offset);
  1474. }
  1475. }
  1476. return ge::SUCCESS;
  1477. }
  1478. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1479. GE_CHECK_NOTNULL(node->GetOpDesc());
  1480. vector<int64_t> input_list;
  1481. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1482. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1483. vector<int64_t> output_list;
  1484. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1485. if (peer_out_anchor == nullptr) {
  1486. continue;
  1487. }
  1488. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1489. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1490. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1491. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1492. if (last_peer_out_node->GetType() != VARIABLE) {
  1493. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1494. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1495. output_list = last_peer_out_op_desc->GetOutputOffset();
  1496. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1497. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1498. }
  1499. } else {
  1500. vector<int64_t> cur_node_input_list;
  1501. auto cur_node_op_desc = node->GetOpDesc();
  1502. GE_CHECK_NOTNULL(cur_node_op_desc);
  1503. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1504. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1505. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1506. }
  1507. }
  1508. }
  1509. } else if (node->GetType() == DATA_TYPE) {
  1510. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1511. GELOGE(FAILED, "[Update][Offset:Input:Const]fail for node:%s ", node->GetName().c_str());
  1512. return FAILED;
  1513. }
  1514. } else {
  1515. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1516. GELOGE(FAILED, "[Update][Offset:Input]fail for node:%s", node->GetName().c_str());
  1517. return FAILED;
  1518. }
  1519. }
  1520. node->GetOpDesc()->SetInputOffset(input_list);
  1521. return SUCCESS;
  1522. }
  1523. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1524. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1525. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1526. // Parsing offset and size vectors
  1527. vector<int64_t> memory_offset_start;
  1528. vector<int64_t> memory_offset_size;
  1529. memory_offset_start.emplace_back(atomic_mem_start);
  1530. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1531. memory_offset_start.emplace_back(mem_offset_end[i]);
  1532. // Number 1 means element index
  1533. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1534. memory_offset_size.emplace_back(size);
  1535. }
  1536. memory_offset_start.pop_back();
  1537. const auto &in_control_anchor = node->GetInControlAnchor();
  1538. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1539. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1540. if (peer_out_control_anchor == nullptr) {
  1541. continue;
  1542. }
  1543. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1544. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1545. if (peer_out_node_desc == nullptr) {
  1546. continue;
  1547. }
  1548. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1549. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1550. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1551. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1552. GELOGE(FAILED, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1553. return FAILED;
  1554. }
  1555. }
  1556. }
  1557. }
  1558. return SUCCESS;
  1559. }
  1560. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1561. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1562. auto node_op_desc = node->GetOpDesc();
  1563. if (node_op_desc != nullptr) {
  1564. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1565. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1566. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1567. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1568. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1569. node_op_desc->SetWorkspace(workspace_vector);
  1570. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1571. std::vector<int64_t> mem_start_vector;
  1572. // If GetListInt fail, mem_start_vector is empty.
  1573. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1574. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1575. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1576. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1577. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1578. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1579. ATTR_NAME_AUTOMIC_ADD_START.c_str(), node_op_desc->GetName().c_str());
  1580. return FAILED);
  1581. std::vector<int64_t> mem_size_vector;
  1582. // If GetListInt fail, mem_size_vector is empty.
  1583. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1584. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1585. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1586. REPORT_INNER_ERROR("E19999", "Set Attr:%s failed, op_name:%s",
  1587. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1588. GELOGE(FAILED, "[Set][Attr:%s]fail for op_name:%s",
  1589. ATTR_NAME_AUTOMIC_ADD_MEM_SIZE.c_str(), node_op_desc->GetName().c_str());
  1590. return FAILED);
  1591. std::stringstream ss;
  1592. for (auto iter : atomic_mem_start) {
  1593. ss << iter << " ";
  1594. }
  1595. string atomic_mem_start_str = ss.str();
  1596. ss.clear();
  1597. ss.str("");
  1598. for (auto iter : atomic_mem_size) {
  1599. ss << iter << " ";
  1600. }
  1601. string atomic_mem_size_str = ss.str();
  1602. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1603. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1604. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1605. atomic_mem_size_str.c_str());
  1606. }
  1607. return SUCCESS;
  1608. }
  1609. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1610. if (mem_align_size <= 0) {
  1611. return;
  1612. }
  1613. auto iter = memory_offset_.find(memory_type);
  1614. if (iter == memory_offset_.end()) {
  1615. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1616. return;
  1617. }
  1618. iter->second.mem_offset_ =
  1619. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1620. }
  1621. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1622. int64_t &memory_type) {
  1623. memory_type = RT_MEMORY_HBM;
  1624. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1625. for (auto &n : nodes) {
  1626. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1627. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"),
  1628. "[Get][MemType:input]fail for node:%s", n->GetName().c_str())
  1629. break;
  1630. }
  1631. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1632. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"),
  1633. "[Get][MemType:output]fail for node:%s", n->GetName().c_str())
  1634. break;
  1635. }
  1636. }
  1637. return SUCCESS;
  1638. }
  1639. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1640. memory_type = RT_MEMORY_HBM;
  1641. vector<int64_t> mem_type_list;
  1642. if (input_or_output == "input") {
  1643. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1644. }
  1645. if (input_or_output == "output") {
  1646. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1647. }
  1648. if (mem_type_list.empty()) {
  1649. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1650. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1651. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1652. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1653. return FAILED;
  1654. }
  1655. return SUCCESS;
  1656. }
  1657. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1658. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1659. " of mem type list is not equal to the size of in data anchor" +
  1660. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1661. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1662. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1663. return FAILED;
  1664. }
  1665. if (!CheckContinuousMemType(mem_type_list)) {
  1666. GELOGE(FAILED, "[Check][MemType:Continuous]fail for node:%s", node->GetName().c_str());
  1667. return FAILED;
  1668. }
  1669. // It is continuous memory and memory type is the same, so use the first memory.
  1670. memory_type = mem_type_list[0];
  1671. return SUCCESS;
  1672. }
  1673. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1674. if (mem_type_list.size() == 0) {
  1675. return true;
  1676. }
  1677. int64_t mem_type_tmp = mem_type_list[0];
  1678. for (auto mem_type : mem_type_list) {
  1679. if (mem_type != mem_type_tmp) {
  1680. REPORT_INNER_ERROR(
  1681. "E19999",
  1682. "The memory is continuous, but the type of the input memory is inconsistent. They are %s and %s",
  1683. FmtToStr(mem_type_tmp).c_str(), FmtToStr(mem_type).c_str());
  1684. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1685. mem_type_tmp, mem_type);
  1686. return false;
  1687. }
  1688. }
  1689. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1690. REPORT_INNER_ERROR("E19999", "Memory offset map does not have memory type %s", FmtToStr(mem_type_tmp).c_str());
  1691. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1692. return false;
  1693. }
  1694. return true;
  1695. }
  1696. void GraphMemoryAssigner::PrintMemoryOffset() {
  1697. for (auto pair : memory_offset_) {
  1698. // Assign memory of max batch nodes that have the same batch label.
  1699. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1700. pair.first, pair.second.mem_offset_);
  1701. }
  1702. }
  1703. ge::Status GraphMemoryAssigner::TryGetNodeRefIndexes(const NodePtr &node, map<int32_t, int32_t> &out2ins) const{
  1704. // data and netoutput no need check because only data's output or netoutput's input is used
  1705. if (node->GetType() == DATA || node->GetType() == NETOUTPUT) {
  1706. return ge::SUCCESS;
  1707. }
  1708. for (const auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  1709. int32_t reuse_in_index = -1;
  1710. // nopadding means output[0] reuse input[0], but as history reason,
  1711. // other output index also return true for mem assign in block_mem_assigner
  1712. if (GraphUtils::IsNoPaddingRefFromInput(out_data_anchor, reuse_in_index)) {
  1713. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1714. return ge::SUCCESS;
  1715. }
  1716. bool reuse_input_flag = GraphUtils::IsRefFromInput(out_data_anchor, reuse_in_index);
  1717. if (reuse_input_flag) {
  1718. if (node->GetInDataAnchor(reuse_in_index) != nullptr) {
  1719. out2ins.emplace(out_data_anchor->GetIdx(), reuse_in_index);
  1720. } else {
  1721. REPORT_INNER_ERROR("E19999", "Invalid reuse_input value %d on output %d of node %s, "
  1722. "please check attr reuse_input",
  1723. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1724. GELOGE(FAILED, "[Check][Attr]Invalid reuse_input value %d on output %d of node %s, "
  1725. "please check attr reuse_input",
  1726. reuse_in_index, out_data_anchor->GetIdx(), node->GetName().c_str());
  1727. return FAILED;
  1728. }
  1729. }
  1730. }
  1731. return ge::SUCCESS;
  1732. }
  1733. bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly(
  1734. const NodePtr &input_continuous_node, map<NodePtr, uint32_t> &node_2_continuous_type) {
  1735. for (const auto &in_node : input_continuous_node->GetInDataNodes()) {
  1736. if (in_node->GetType() == VARIABLE) {
  1737. GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(),
  1738. in_node->GetName().c_str());
  1739. return true;
  1740. }
  1741. auto iter = node_2_continuous_type.find(in_node);
  1742. // In node's topo order in the front, so function can not be exception
  1743. auto continuous_type = iter->second;
  1744. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1745. if (continuous_input) {
  1746. GELOGI("[Store][Node] of %s cause it's precursor node %s need assign continuous input memory",
  1747. input_continuous_node->GetName().c_str(), in_node->GetName().c_str());
  1748. return false;
  1749. }
  1750. }
  1751. for (const auto &out_node : input_continuous_node->GetOutDataNodes()) {
  1752. auto continuous_type = GetContinuousMemoryType(out_node->GetOpDesc());
  1753. node_2_continuous_type.emplace(out_node, continuous_type);
  1754. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  1755. if (continuous_input) {
  1756. GELOGI("[Store][Node] of %s cause it's succeed node %s need assign continuous input memory",
  1757. input_continuous_node->GetName().c_str(), out_node->GetName().c_str());
  1758. return false;
  1759. }
  1760. }
  1761. return true;
  1762. }
  1763. ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node,
  1764. uint32_t continuous_type,
  1765. bool reverse_refresh) {
  1766. int64_t mem_clean_start = 0;
  1767. int64_t mem_clean_size = 0;
  1768. int64_t memory_type = RT_MEMORY_HBM;
  1769. GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"),
  1770. "[Get][MemType]fail for node:%s", input_continuous_node->GetName().c_str());
  1771. auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type,
  1772. continuous_type, reverse_refresh);
  1773. if (ret != ge::SUCCESS) {
  1774. GELOGE(ret, "[Assign][Memory:Input:continuous]fail for node:%s", input_continuous_node->GetName().c_str());
  1775. return ret;
  1776. }
  1777. // Clean up atomic address, eg, hcom node
  1778. vector<int32_t> input_indexes;
  1779. // If GetListInt fail, input_indexes is empty.
  1780. (void)ge::AttrUtils::GetListInt(input_continuous_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  1781. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  1782. // check whether there is an atomic conflict between the current node and the peer out node
  1783. if (!CheckInputIsSupportAtomic(input_continuous_node)) {
  1784. return ge::FAILED;
  1785. }
  1786. const auto &in_control_anchor = input_continuous_node->GetInControlAnchor();
  1787. GE_CHECK_NOTNULL(in_control_anchor);
  1788. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1789. GE_CHECK_NOTNULL(peer_out_control_anchor);
  1790. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1791. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  1792. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  1793. if (ret != SUCCESS) {
  1794. GELOGE(ret, "[Set][AtomicCleanAttr]fail for node:%s", peer_out_node->GetName().c_str());
  1795. return ret;
  1796. }
  1797. }
  1798. }
  1799. }
  1800. return ge::SUCCESS;
  1801. }
  1802. Status GraphMemoryAssigner::AssignBufferPoolMemory() {
  1803. auto is_buffer_pool_mem_enable = [] (const ComputeGraphPtr &graph) -> bool {
  1804. for (NodePtr &node : graph->GetAllNodes()) {
  1805. auto op_desc = node->GetOpDesc();
  1806. if (op_desc == nullptr) {
  1807. continue;
  1808. }
  1809. bool has_attrs = op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_ID) && op_desc->HasAttr(ATTR_NAME_BUFFER_POOL_SIZE);
  1810. if (has_attrs) {
  1811. return true;
  1812. }
  1813. }
  1814. return false;
  1815. };
  1816. auto root_graph = GraphUtils::FindRootGraph(compute_graph_);
  1817. GE_CHECK_NOTNULL(root_graph);
  1818. if (root_graph->GetGraphUnknownFlag()) {
  1819. GELOGI("[Check][Enable]Unknown root graph does not support buffer pool memory, graph:%s.",
  1820. compute_graph_->GetName().c_str());
  1821. return SUCCESS;
  1822. }
  1823. if (!is_buffer_pool_mem_enable(compute_graph_)) {
  1824. GELOGD("[Check][Enable]Buffer pool memory is not enable, graph:%s.", compute_graph_->GetName().c_str());
  1825. return SUCCESS;
  1826. }
  1827. map<int64_t, size_t> mem_type_to_offset;
  1828. for (const auto &pair : memory_offset_) {
  1829. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  1830. }
  1831. BufferPoolMemAssigner buffer_pool_mem_assigner(compute_graph_, mem_type_to_offset);
  1832. Status status = buffer_pool_mem_assigner.Assign();
  1833. if (status != SUCCESS) {
  1834. GELOGE(status, "[Assign][BufferPoolMem]Graph:%s.", compute_graph_->GetName().c_str());
  1835. REPORT_INNER_ERROR("E19999", "Failed to assign buffer pool memory, graph:%s.", compute_graph_->GetName().c_str());
  1836. return status;
  1837. }
  1838. int64_t mem_type = buffer_pool_mem_assigner.GetMemType();
  1839. auto iter = memory_offset_.find(mem_type);
  1840. if (iter == memory_offset_.end()) {
  1841. GELOGE(FAILED, "[Check][MemType]Memory type is not supported, graph:%s, mem type:%ld.",
  1842. compute_graph_->GetName().c_str(), mem_type);
  1843. REPORT_INNER_ERROR("E19999", "Memory type is not supported, graph:%s, mem type:%ld.",
  1844. compute_graph_->GetName().c_str(), mem_type);
  1845. return FAILED;
  1846. }
  1847. iter->second.mem_offset_ = buffer_pool_mem_assigner.GetMemOffset();
  1848. GELOGI("[Assign][BufferPoolMem]Assign buffer pool memory successfully, graph:%s, mem type:%ld, mem offset:%zu.",
  1849. compute_graph_->GetName().c_str(), mem_type, buffer_pool_mem_assigner.GetMemOffset());
  1850. return SUCCESS;
  1851. }
  1852. // if producer and customers in the same stream, or customers on the same stream when producer not assign a stream,
  1853. // then return false.
  1854. bool GraphMemoryAssigner::IsOutputVisitedByMultiStream(const NodePtr &peer_out_node, int64_t out_anchor_index) {
  1855. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, return true);
  1856. int64_t unique_stream_id = peer_out_node->GetOpDesc()->GetStreamId();
  1857. GE_IF_BOOL_EXEC(peer_out_node->GetOutDataAnchor(out_anchor_index) == nullptr, return true);
  1858. for (const auto &in_data_anchor : peer_out_node->GetOutDataAnchor(out_anchor_index)->GetPeerInDataAnchors()) {
  1859. auto node = in_data_anchor->GetOwnerNode();
  1860. GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, continue);
  1861. if (node->GetOpDesc()->GetStreamId() == kInvalidStream) {
  1862. continue;
  1863. }
  1864. if (unique_stream_id == kInvalidStream) { // peer_out_node not belong to any stream
  1865. unique_stream_id = node->GetOpDesc()->GetStreamId();
  1866. continue;
  1867. }
  1868. if (node->GetOpDesc()->GetStreamId() != unique_stream_id) {
  1869. return true;
  1870. }
  1871. }
  1872. return false;
  1873. }
  1874. void GraphMemoryAssigner::UpdatePrevNodeInputDesc(const NodePtr &prev_node,
  1875. const vector<int64_t> &prev_node_input_index_vec,
  1876. int64_t distance) {
  1877. GE_IF_BOOL_EXEC(prev_node == nullptr, return);
  1878. auto prev_node_op_desc = prev_node->GetOpDesc();
  1879. GE_IF_BOOL_EXEC(prev_node_op_desc == nullptr, return);
  1880. for (const auto prev_node_input_index : prev_node_input_index_vec) {
  1881. auto input_desc = prev_node_op_desc->GetInputDesc(prev_node_input_index);
  1882. vector<int64_t> prev_next_distances;
  1883. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1884. GELOGW("Get [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed",
  1885. prev_node_op_desc->GetName().c_str(),
  1886. prev_node_input_index);
  1887. continue;
  1888. }
  1889. if (prev_next_distances.size() == kPrevNextDistanceNum) {
  1890. prev_next_distances[1] = distance;
  1891. } else {
  1892. GELOGW("Size of prev_next_distances is not %d.", kPrevNextDistanceNum);
  1893. continue;
  1894. }
  1895. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1896. GELOGW("Set [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1897. prev_node_op_desc->GetName().c_str(),
  1898. prev_node_input_index);
  1899. continue;
  1900. }
  1901. if (prev_node_op_desc->UpdateInputDesc(prev_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1902. GELOGW("Update [%s] input [%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1903. prev_node_op_desc->GetName().c_str(),
  1904. prev_node_input_index);
  1905. continue;
  1906. }
  1907. GELOGD("Set the next distance[%ld] to node[%s], input index[%ld]",
  1908. distance,
  1909. prev_node->GetName().c_str(),
  1910. prev_node_input_index);
  1911. }
  1912. return;
  1913. }
  1914. void GraphMemoryAssigner::UpdateCurNodeInputDesc(const NodePtr &cur_node,
  1915. int64_t cur_node_input_index,
  1916. int64_t distance) {
  1917. GE_IF_BOOL_EXEC(cur_node == nullptr, return);
  1918. GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, return);
  1919. auto input_desc = cur_node->GetOpDesc()->GetInputDesc(cur_node_input_index);
  1920. vector<int64_t> prev_next_distances{distance, -1};
  1921. if (!ge::AttrUtils::SetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  1922. GELOGW("Set [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1923. cur_node->GetOpDesc()->GetName().c_str(),
  1924. cur_node_input_index);
  1925. return;
  1926. }
  1927. if (cur_node->GetOpDesc()->UpdateInputDesc(cur_node_input_index, input_desc) != GRAPH_SUCCESS) {
  1928. GELOGW("Update [%s] input[%ld] ATTR_NAME_DATA_VISIT_DISTANCE failed.",
  1929. cur_node->GetOpDesc()->GetName().c_str(),
  1930. cur_node_input_index);
  1931. return;
  1932. }
  1933. GELOGD("Set the prev distance[%ld] to node[%s], input index[%ld]",
  1934. distance,
  1935. cur_node->GetName().c_str(),
  1936. cur_node_input_index);
  1937. return;
  1938. }
  1939. void GraphMemoryAssigner::CheckNeedCalcDistAndUpdateVisitInfo(
  1940. const NodePtr &peer_out_node,
  1941. const OutDataAnchorPtr &peer_out_anchor,
  1942. size_t matched_mem_offset,
  1943. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1944. bool &is_need_calc_distance) {
  1945. auto iter = mem_block_visit_info.find(matched_mem_offset);
  1946. // cannot find visit info, peer_out_node must be a producer and this data is the first time to be visited.
  1947. if (iter == mem_block_visit_info.end()) {
  1948. if (IsOutputVisitedByMultiStream(peer_out_node, peer_out_anchor->GetIdx())) {
  1949. vector<int64_t> temp;
  1950. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(nullptr, temp)));
  1951. is_need_calc_distance = false;
  1952. return;
  1953. } else {
  1954. vector<int64_t> temp = {-1};
  1955. // producer's prev_node_index set to -1 as default
  1956. mem_block_visit_info.insert(std::make_pair(matched_mem_offset, std::make_pair(peer_out_node, temp)));
  1957. is_need_calc_distance = true;
  1958. return;
  1959. }
  1960. } else {
  1961. if (mem_block_visit_info[matched_mem_offset].first == nullptr) {
  1962. // multi-stream visit, no need to calculate
  1963. is_need_calc_distance = false;
  1964. return;
  1965. }
  1966. if (peer_out_node->GetOpDesc()->GetStreamId() !=
  1967. mem_block_visit_info[matched_mem_offset].first->GetOpDesc()->GetStreamId()) {
  1968. // cur node and peer_out_node not in the same stream, no need to calculate
  1969. is_need_calc_distance = false;
  1970. return;
  1971. }
  1972. }
  1973. is_need_calc_distance = true;
  1974. return;
  1975. }
  1976. // calculate distance, update visit info, update prev_node input desc, update cur node input desc
  1977. void GraphMemoryAssigner::CalcDistanceAndUpdateDesc(const map<string, int64_t> &node_index_in_stream,
  1978. const InDataAnchorPtr &in_data_anchor,
  1979. size_t matched_mem_offset,
  1980. NodePtr &node,
  1981. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  1982. bool &is_need_skip) {
  1983. int64_t distance = -1;
  1984. auto prev_node = mem_block_visit_info[matched_mem_offset].first;
  1985. auto prev_node_input_index_vec = mem_block_visit_info[matched_mem_offset].second;
  1986. GE_IF_BOOL_EXEC(prev_node == nullptr, is_need_skip = true; return);
  1987. if (prev_node_input_index_vec.size() == 1 && prev_node_input_index_vec[0] == -1) {
  1988. // prev_node is producer and the data is just be produced(not visited by other node)
  1989. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  1990. if (prev_node->GetOpDesc()->GetStreamId() == -1) { // producer not assigned a stream
  1991. distance = 0;
  1992. } else {
  1993. auto iter = node_index_in_stream.find(prev_node->GetName());
  1994. if (iter == node_index_in_stream.end()) {
  1995. distance = 0;
  1996. } else {
  1997. distance = node_index_in_stream.at(node->GetName()) - iter->second - 1;
  1998. }
  1999. }
  2000. mem_block_visit_info[matched_mem_offset].first = node;
  2001. mem_block_visit_info[matched_mem_offset].second.clear();
  2002. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  2003. } else { // the data is visit by other customer just before.
  2004. if (prev_node_input_index_vec.empty()) {
  2005. GELOGW("Missing prev node[%s] input index.", prev_node->GetName().c_str());
  2006. is_need_skip = true;
  2007. return;
  2008. }
  2009. if (prev_node == node) { // scene: multiple anchors of a node access the same data
  2010. vector<int64_t> prev_next_distances;
  2011. GE_IF_BOOL_EXEC(prev_node->GetOpDesc() == nullptr, is_need_skip = true; return);
  2012. auto input_desc = prev_node->GetOpDesc()->GetInputDesc(prev_node_input_index_vec[0]);
  2013. if (!ge::AttrUtils::GetListInt(input_desc, ATTR_NAME_DATA_VISIT_DISTANCE, prev_next_distances)) {
  2014. GELOGW("Get ATTR_NAME_DATA_VISIT_DISTANCE failed.");
  2015. is_need_skip = true;
  2016. return;
  2017. }
  2018. if (prev_next_distances.size() != kPrevNextDistanceNum) {
  2019. GELOGW("Size of prev_next_distance is not %d.", kPrevNextDistanceNum);
  2020. is_need_skip = true;
  2021. return;
  2022. } else {
  2023. distance = prev_next_distances[0]; // use the same prev_distance as previous anchor
  2024. }
  2025. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  2026. } else {
  2027. distance = node_index_in_stream.at(node->GetName()) - node_index_in_stream.at(prev_node->GetName()) - 1;
  2028. UpdatePrevNodeInputDesc(prev_node, prev_node_input_index_vec, distance);
  2029. mem_block_visit_info[matched_mem_offset].first = node;
  2030. mem_block_visit_info[matched_mem_offset].second.clear();
  2031. mem_block_visit_info[matched_mem_offset].second.push_back(in_data_anchor->GetIdx());
  2032. }
  2033. }
  2034. UpdateCurNodeInputDesc(node, in_data_anchor->GetIdx(), distance);
  2035. }
  2036. void GraphMemoryAssigner::DeleteVisitInfoWhenLifecycleEnded(
  2037. const NodePtr &node,
  2038. const InDataAnchorPtr &in_data_anchor,
  2039. size_t matched_mem_offset,
  2040. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info) {
  2041. GE_IF_BOOL_EXEC(node->GetOpDesc() == nullptr, return);
  2042. auto input_desc = node->GetOpDesc()->GetInputDesc(in_data_anchor->GetIdx());
  2043. bool is_end_of_inputmem_lifecycle = false;
  2044. // if is_end_of_inputmem_lifecycle is true, indicating that cur node is the last customer of this data,
  2045. // then we need to delete the visit info of the block in case that the memblock be reused and visited.
  2046. if (ge::AttrUtils::GetBool(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, is_end_of_inputmem_lifecycle) &&
  2047. is_end_of_inputmem_lifecycle) {
  2048. GELOGD("ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE is true, node name is [%s], in_data_anchor index is [%d]",
  2049. node->GetName().c_str(),
  2050. in_data_anchor->GetIdx());
  2051. auto iter = mem_block_visit_info.find(matched_mem_offset);
  2052. if (iter != mem_block_visit_info.end()) {
  2053. mem_block_visit_info.erase(iter);
  2054. }
  2055. }
  2056. }
  2057. void GraphMemoryAssigner::MarkNodeDistanceAttr(const ComputeGraphPtr &compute_graph,
  2058. NodePtr &node,
  2059. map<size_t, pair<NodePtr, vector<int64_t>>> &mem_block_visit_info,
  2060. const map<string, int64_t> &node_index_in_stream) {
  2061. GELOGD("Begin to mark node distance attr, node name is [%s]", node->GetName().c_str());
  2062. GE_IF_BOOL_EXEC(node == nullptr, return);
  2063. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  2064. auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
  2065. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  2066. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  2067. GE_IF_BOOL_EXEC(peer_out_node == nullptr, continue);
  2068. GE_IF_BOOL_EXEC(peer_out_node->GetOpDesc() == nullptr, continue);
  2069. auto matched_mem_offset = peer_out_node->GetOpDesc()->GetOutputOffset().at(peer_out_anchor->GetIdx());
  2070. bool is_need_calc_distance = false;
  2071. CheckNeedCalcDistAndUpdateVisitInfo(peer_out_node, peer_out_anchor, matched_mem_offset,
  2072. mem_block_visit_info, is_need_calc_distance);
  2073. if (!is_need_calc_distance) {
  2074. continue;
  2075. }
  2076. bool is_need_skip = false;
  2077. CalcDistanceAndUpdateDesc(node_index_in_stream, in_data_anchor, matched_mem_offset, node,
  2078. mem_block_visit_info, is_need_skip);
  2079. if (is_need_skip) {
  2080. continue;
  2081. }
  2082. DeleteVisitInfoWhenLifecycleEnded(node, in_data_anchor, matched_mem_offset, mem_block_visit_info);
  2083. }
  2084. }
  2085. void GraphMemoryAssigner::MarkDistanceAttr() {
  2086. // key: mem_offset of the memory which we visited. value: node we visited and input index of this node
  2087. map<size_t, pair<NodePtr, vector<int64_t>>> mem_block_visit_info;
  2088. // key: node name, value: topo order of node in it's belonged stream(exclude ge_local_op)
  2089. map<string, int64_t> node_index_in_stream;
  2090. // key: stream id, value: cur nodes num in that stream
  2091. map<int64_t, int64_t> stream_nodes_num;
  2092. for (auto &node : compute_graph_->GetAllNodes()) {
  2093. auto node_op_desc = node->GetOpDesc();
  2094. GE_IF_BOOL_EXEC(node_op_desc == nullptr, return);
  2095. int64_t stream_id = node_op_desc->GetStreamId();
  2096. if (node_op_desc->GetOpKernelLibName() != kEngineNameGeLocal) {
  2097. if (stream_nodes_num.find(stream_id) == stream_nodes_num.end()) {
  2098. stream_nodes_num.insert(std::make_pair(stream_id, 1));
  2099. } else {
  2100. ++stream_nodes_num[stream_id];
  2101. }
  2102. node_index_in_stream.insert(std::make_pair(node->GetName(), stream_nodes_num[stream_id] - 1));
  2103. MarkNodeDistanceAttr(compute_graph_, node, mem_block_visit_info, node_index_in_stream);
  2104. } else {
  2105. GELOGD("node[%s] is ge_local_op, no need to calculate distance.", node->GetName().c_str());
  2106. }
  2107. }
  2108. }
  2109. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示