You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 66 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "common/util/error_manager/error_manager.h"
  21. #include "framework/common/debug/ge_log.h"
  22. #include "framework/common/debug/log.h"
  23. #include "graph/build/memory/hybrid_mem_assigner.h"
  24. #include "graph/build/memory/var_mem_assign_util.h"
  25. #include "graph/build/memory/block_mem_assigner.h"
  26. #include "graph/common/omg_util.h"
  27. #include "graph/debug/ge_attr_define.h"
  28. #include "graph/ge_attr_value.h"
  29. #include "graph/manager/graph_var_manager.h"
  30. #include "graph/utils/tensor_utils.h"
  31. #include "graph/utils/type_utils.h"
  32. namespace {
  33. const int kAllInputAddrIsAtomic = -1;
  34. const int kVirtualInputNodeMemoryReuse = 0;
  35. const int kVirtualOutputNodeMemoryReuse = 1;
  36. // One state per bit cannot be repeated
  37. enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 };
  38. int64_t GetSymbolOutputOffset(const std::map<std::string, std::string> &anchor_to_symbol,
  39. const std::map<std::string, std::list<ge::NodeIndexIO>> &symbol_to_anchors,
  40. const ge::NodePtr &node, const uint32_t i) {
  41. ge::NodeIndexIO cur_node_index_io(node, i, ge::kOut);
  42. auto iter1 = anchor_to_symbol.find(cur_node_index_io.ToString());
  43. if (iter1 == anchor_to_symbol.end()) {
  44. return ge::kInvalidOffset;
  45. }
  46. auto out_symbol = iter1->second;
  47. auto iter2 = symbol_to_anchors.find(out_symbol);
  48. if (iter2 == symbol_to_anchors.end()) {
  49. return ge::kInvalidOffset;
  50. }
  51. for (const auto &node_index_io : iter2->second) {
  52. if (node_index_io.value_ == out_symbol) {
  53. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  54. vector<int64_t> symbol_output_list = node_index_io.node_->GetOpDesc()->GetOutputOffset();
  55. if (node_index_io.index_ >= symbol_output_list.size()) {
  56. return ge::kInvalidOffset;
  57. }
  58. GELOGD("Node %s %uth output offset is %ld, Symbol %s output offset is %ld.", node->GetName().c_str(), i,
  59. output_list[i], iter2->first.c_str(), symbol_output_list.at(node_index_io.index_));
  60. return symbol_output_list.at(node_index_io.index_);
  61. }
  62. }
  63. return ge::kInvalidOffset;
  64. }
  65. } // namespace
  66. namespace ge {
  67. Status VariableMemoryAssigner::Assign() {
  68. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  69. if (result != ge::SUCCESS) {
  70. return result;
  71. }
  72. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  73. if (result != ge::SUCCESS) {
  74. return result;
  75. }
  76. return ge::SUCCESS;
  77. }
  78. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  79. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  80. if (result != ge::SUCCESS) {
  81. return result;
  82. }
  83. return ge::SUCCESS;
  84. }
  85. Status GraphMemoryAssigner::AssignMemory() {
  86. ge::HybridMemAssignerPtr mem_assigner(new(std::nothrow) HybridMemAssigner(compute_graph_));
  87. if (mem_assigner->Assign() != ge::SUCCESS) {
  88. GELOGE(ge::FAILED, "Memory assigner failed");
  89. return ge::FAILED;
  90. }
  91. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  92. memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
  93. if (mem_assigner->GetP2PMemOffset() >= 0) {
  94. MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
  95. memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
  96. }
  97. auto session_id = compute_graph_->GetSessionID();
  98. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  99. auto variable_assigner =
  100. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  101. if (variable_assigner == nullptr) {
  102. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  103. return ge::FAILED;
  104. }
  105. if (variable_assigner->Assign() != ge::SUCCESS) {
  106. return ge::FAILED;
  107. }
  108. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  109. GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  110. mem_assigner_ = std::move(mem_assigner);
  111. return ge::SUCCESS;
  112. }
  113. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  114. auto variable_assigner =
  115. std::unique_ptr<ge::VariableMemoryAssigner>(new(std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  116. if (variable_assigner == nullptr) {
  117. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  118. return ge::FAILED;
  119. }
  120. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  121. return ge::FAILED;
  122. }
  123. return ge::SUCCESS;
  124. }
  125. ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  126. int64_t dim_index, int64_t &output_mem_size,
  127. int64_t &batch_dim_num, int64_t &out_size) {
  128. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  129. if (graph_status != GRAPH_SUCCESS) {
  130. GELOGE(FAILED, "Opdesc GetSize failed!");
  131. return FAILED;
  132. }
  133. GeShape output_shape = output_desc->GetShape();
  134. std::vector<int64_t> output_dims = output_shape.GetDims();
  135. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  136. std::string error = "Invaild value" + FmtToStr(dim_index) +
  137. " of attr _reuse_input_on_dim_index, which is out of data range [0,"
  138. + std::to_string(output_dims.size()) + ")";
  139. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  140. return FAILED;
  141. }
  142. for (int64_t index = 0; index < dim_index; index++) {
  143. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  144. batch_dim_num *= output_dims[index];
  145. output_dims[index] = 1;
  146. }
  147. output_shape = GeShape(output_dims);
  148. Format out_format = output_desc->GetFormat();
  149. DataType data_type = output_desc->GetDataType();
  150. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  151. if (graph_status != GRAPH_SUCCESS) {
  152. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  153. return FAILED;
  154. }
  155. if (output_mem_size < 0) {
  156. std::string error = "After calculating tensor memory size, output_mem_size" + FmtToStr(output_mem_size) +
  157. " is out of data range [0," + std::to_string(INT64_MAX) + "]";
  158. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  159. return FAILED;
  160. }
  161. return SUCCESS;
  162. }
  163. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map<int64_t, size_t> &mem_type_to_offset) {
  164. if (memory_offset_.empty()) {
  165. GELOGE(FAILED, "memory_offset_ is empty.");
  166. return ge::FAILED;
  167. }
  168. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  169. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  170. size_t total_mem_offset = 0;
  171. for (auto pair : memory_offset_) {
  172. mem_type_to_offset[pair.first] = pair.second.mem_offset_;
  173. total_mem_offset += pair.second.mem_offset_;
  174. }
  175. auto session_id = compute_graph_->GetSessionID();
  176. if (total_mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  177. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", total_mem_offset,
  178. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  179. for (auto iter : mem_type_to_offset) {
  180. ErrorManager::GetInstance().ATCReportErrMessage("E19022", {"memType", "size", "item", "maxsize"},
  181. {std::to_string(iter.first), std::to_string(iter.second), "featuremap",
  182. std::to_string(VarManager::Instance(session_id)->GetGraphMemoryMaxSize())});
  183. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  184. iter.second, iter.first);
  185. }
  186. return ge::FAILED;
  187. }
  188. return SUCCESS;
  189. }
  190. Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offset, size_t &zero_mem_copy_size) {
  191. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  192. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  193. size_t mem_offset_tmp = mem_offset[RT_MEMORY_HBM];
  194. // set offset for zero copy block
  195. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  196. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  197. continue;
  198. }
  199. memory_block->Resize();
  200. memory_block->SetHeadOffset(mem_offset[RT_MEMORY_HBM]);
  201. mem_offset[RT_MEMORY_HBM] += memory_block->Size();
  202. memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
  203. }
  204. // set offset for zero copy nodes
  205. priority_assigner->SetOpMemOffset(true);
  206. zero_mem_copy_size = mem_offset[RT_MEMORY_HBM] - mem_offset_tmp;
  207. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  208. if (iter == memory_offset_.end()) {
  209. std::string error = "Memory offset does not have memory type[HBM]";
  210. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  211. return FAILED;
  212. }
  213. iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];
  214. GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
  215. zero_mem_copy_size);
  216. return SUCCESS;
  217. }
  218. uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) {
  219. if (op_desc == nullptr) {
  220. return 0;
  221. };
  222. bool is_continuous = false;
  223. uint32_t continuous_type = 0;
  224. // If GetBool fail, is_continuous is false.
  225. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous);
  226. if (is_continuous) {
  227. continuous_type |= kTypeInput;
  228. } else {
  229. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous);
  230. if (is_continuous) {
  231. bool attr_reuse = false;
  232. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  233. if (attr_reuse) {
  234. continuous_type |= kTypeInputNoPadding;
  235. }
  236. }
  237. }
  238. is_continuous = false;
  239. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous);
  240. if (is_continuous) {
  241. continuous_type |= kTypeOutput;
  242. } else {
  243. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous);
  244. if (is_continuous) {
  245. bool attr_reuse = false;
  246. (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  247. if (attr_reuse) {
  248. continuous_type |= kTypeOutputNoPadding;
  249. }
  250. }
  251. }
  252. if (continuous_type != 0) {
  253. GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type);
  254. }
  255. return continuous_type;
  256. }
  257. Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type,
  258. int64_t &tensor_size, int64_t &nopadding_size) {
  259. if ((op_desc == nullptr) || (output_desc == nullptr)) {
  260. GELOGE(FAILED, "Input para is nullptr.");
  261. return FAILED;
  262. }
  263. tensor_size = 0;
  264. nopadding_size = 0;
  265. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  266. if (is_nopadding) {
  267. int64_t attr_dim_index;
  268. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  269. if (!get_attr_dim_flag) {
  270. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  271. return FAILED;
  272. }
  273. // Calculate tensor real size of each piece of data and out size of complete data
  274. int64_t batch_dim_num = 1;
  275. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) !=
  276. SUCCESS) {
  277. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str());
  278. return FAILED;
  279. }
  280. } else {
  281. if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) {
  282. GELOGE(FAILED, "GetSize failed.");
  283. return FAILED;
  284. }
  285. }
  286. if ((tensor_size < 0) || (nopadding_size < 0)) {
  287. GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str());
  288. return FAILED;
  289. }
  290. return SUCCESS;
  291. }
  292. void AlignMemOffset(int64_t &mem_align_size) {
  293. if (mem_align_size <= 0) {
  294. return;
  295. }
  296. mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  297. }
  298. bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) {
  299. bool is_peer_output_continuous = false;
  300. // If GetBool fail, is_peer_output_continuous is false.
  301. (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  302. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  303. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  304. // conflict between the two, we can not support it.
  305. auto peer_output_size = peer_op_desc->GetOutputsSize();
  306. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  307. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  308. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  309. " requires continuous output. There may be conflict between the two." +
  310. "This node is not supported now.";
  311. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  312. return true;);
  313. bool is_peer_reference = false;
  314. // If GetBool fail, is_peer_reference is false.
  315. (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  316. GE_IF_BOOL_EXEC(is_peer_reference,
  317. std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) +
  318. " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) +
  319. " requires continuous output. There may be conflict between the two." +
  320. "This node is not supported now.";
  321. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  322. return true;);
  323. return false;
  324. }
  325. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  326. Status ret;
  327. for (auto &node : compute_graph_->GetAllNodes()) {
  328. GE_CHECK_NOTNULL(node);
  329. auto continuous_type = GetContinuousMemoryType(node->GetOpDesc());
  330. // Assign continuous input memory
  331. bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0);
  332. int64_t memory_type = RT_MEMORY_HBM;
  333. if (continuous_input) {
  334. int64_t mem_clean_start = 0;
  335. int64_t mem_clean_size = 0;
  336. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed.");
  337. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type);
  338. if (ret != ge::SUCCESS) {
  339. GELOGE(ret, "Assign continuous input memory failed!");
  340. return ret;
  341. }
  342. // Clean up atomic address, eg, hcom node
  343. vector<int32_t> input_indexes;
  344. // If GetListInt fail, input_indexes is empty.
  345. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  346. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  347. // check whether there is an atomic conflict between the current node and the peer out node
  348. if (!CheckInputIsSupportAtomic(node)) {
  349. GELOGE(ge::FAILED,
  350. "There is an atomic conflict between the current node and the peer out node, not supported!");
  351. return ge::FAILED;
  352. }
  353. const auto &in_control_anchor = node->GetInControlAnchor();
  354. GE_CHECK_NOTNULL(in_control_anchor);
  355. for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  356. GE_CHECK_NOTNULL(peer_out_control_anchor);
  357. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  358. if (peer_out_node->GetType() == ATOMICADDRCLEAN) {
  359. ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type);
  360. if (ret != SUCCESS) {
  361. GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str());
  362. return ret;
  363. }
  364. }
  365. }
  366. }
  367. }
  368. // Assign continuous output memory
  369. bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0);
  370. if (continuous_output) {
  371. GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed.");
  372. ret = AssignContinuousOutputMemory(node, memory_type, continuous_type);
  373. if (ret != ge::SUCCESS) {
  374. GELOGE(ret, "Assign continuous output memory failed!");
  375. return ret;
  376. }
  377. }
  378. }
  379. for (auto pair : memory_offset_) {
  380. GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
  381. pair.second.mem_offset_);
  382. }
  383. return ge::SUCCESS;
  384. }
  385. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  386. int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) {
  387. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  388. auto iter = memory_offset_.find(memory_type);
  389. if (iter == memory_offset_.end()) {
  390. std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type);
  391. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  392. return FAILED;
  393. }
  394. // The head and tail of hcom continuous input should be added 512
  395. iter->second.mem_offset_ += MEM_ALIGN_SIZE;
  396. continuous_mem_start = iter->second.mem_offset_;
  397. int64_t mem_offset = iter->second.mem_offset_;
  398. int64_t extra_memory_size = 0;
  399. bool is_continuous_input_allocated = false;
  400. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated);
  401. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  402. GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue);
  403. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  404. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  405. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  406. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  407. GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;);
  408. int64_t tensor_desc_size = 0;
  409. int64_t nopadding_size = 0;
  410. int64_t real_size = 0;
  411. std::vector<int64_t> offsets_of_fusion = {};
  412. bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion);
  413. lx_fusion = lx_fusion && !offsets_of_fusion.empty();
  414. if (lx_fusion) {
  415. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(offsets_of_fusion.size())) {
  416. std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) +
  417. " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  418. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  419. return FAILED;
  420. }
  421. nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()];
  422. tensor_desc_size = nopadding_size;
  423. } else {
  424. if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()),
  425. continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  426. return FAILED;
  427. }
  428. }
  429. bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion;
  430. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  431. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_list.size())) {
  432. std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range.";
  433. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  434. return FAILED;
  435. }
  436. // when continuous input has been allocated first input is beginning offset
  437. bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0);
  438. if (is_allocated_first_input) {
  439. mem_offset = output_list.at(peer_out_data_anchor->GetIdx());
  440. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  441. } else {
  442. // set offset for input
  443. output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset;
  444. peer_op_desc->SetOutputOffset(output_list);
  445. }
  446. int64_t align_size = tensor_desc_size;
  447. if (is_nopadding) {
  448. mem_offset += nopadding_size;
  449. extra_memory_size += (tensor_desc_size - nopadding_size);
  450. real_size = nopadding_size;
  451. } else {
  452. ge::AlignMemOffset(align_size);
  453. mem_offset += align_size;
  454. // The head and tail of hcom continuous input should be added 512
  455. extra_memory_size = MEM_ALIGN_SIZE;
  456. real_size = tensor_desc_size;
  457. }
  458. GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] "
  459. "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  460. node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(),
  461. output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type,
  462. is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding);
  463. }
  464. mem_offset += extra_memory_size;
  465. ge::AlignMemOffset(mem_offset);
  466. continuous_mem_size = mem_offset - continuous_mem_start;
  467. if (is_continuous_input_allocated) {
  468. // not allocate memory here, so no need add 512 in header
  469. iter->second.mem_offset_ -= MEM_ALIGN_SIZE;
  470. } else {
  471. iter->second.mem_offset_ = mem_offset;
  472. }
  473. return SUCCESS;
  474. }
  475. Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) {
  476. auto in_data_anchor_list = node->GetAllInDataAnchors();
  477. if (in_data_anchor_list.empty()) {
  478. GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str());
  479. return FAILED;
  480. }
  481. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  482. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null.");
  483. return ge::FAILED);
  484. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  485. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED);
  486. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  487. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  488. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  489. return FAILED;
  490. }
  491. mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx());
  492. return SUCCESS;
  493. }
  494. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type,
  495. uint32_t continuous_type) {
  496. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  497. auto out_op_desc = node->GetOpDesc();
  498. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  499. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  500. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  501. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  502. out_op_desc->GetOutputsSize(), output_list.size());
  503. return ge::FAILED;
  504. }
  505. int64_t mem_offset = 0;
  506. bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0);
  507. if (is_nopadding) {
  508. // out tensor memory must be reused input tensor memory
  509. if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) {
  510. return ge::FAILED;
  511. }
  512. } else {
  513. // Get the reference type of the node, default is false
  514. bool is_ref = false;
  515. // If GetBool fail, is_ref is false.
  516. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  517. // If the output is ref type and refers to the ref of an input, the name of the output
  518. // and the input are the same. Ge encounters ref type, finds matching relationship according
  519. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  520. if (is_ref) {
  521. GELOGI("Current node %s no needs assign continuous output because reference input by name.",
  522. node->GetName().c_str());
  523. return SUCCESS;
  524. }
  525. mem_offset = output_list[0];
  526. }
  527. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  528. output_list[out_data_anchor->GetIdx()] = mem_offset;
  529. int64_t tensor_desc_size = 0;
  530. int64_t nopadding_size = 0;
  531. if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type,
  532. tensor_desc_size, nopadding_size) != ge::SUCCESS) {
  533. return FAILED;
  534. }
  535. if (is_nopadding) {
  536. mem_offset += nopadding_size;
  537. } else {
  538. mem_offset += tensor_desc_size;
  539. ge::AlignMemOffset(mem_offset);
  540. }
  541. GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]"
  542. " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(),
  543. node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  544. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL,
  545. is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding);
  546. }
  547. out_op_desc->SetOutputOffset(output_list);
  548. return ge::SUCCESS;
  549. }
  550. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  551. // key:dynamic batch, batch name
  552. map<string, map<NodePtr, vector<NodePtr>>> normal_atomic_and_clean_nodes_map;
  553. map<string, vector<NodePtr>> connecting_output_atomic_nodes;
  554. Status status = FilterAtomicNodesForMemoryAssign(normal_atomic_and_clean_nodes_map, connecting_output_atomic_nodes);
  555. if (status != SUCCESS) {
  556. GELOGE(status, "Failed to filter atomic nodes for memory assignment.");
  557. return status;
  558. }
  559. auto mem_iter = memory_offset_.find(RT_MEMORY_HBM);
  560. if (mem_iter == memory_offset_.end()) {
  561. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  562. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  563. return FAILED;
  564. }
  565. int64_t batch_atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  566. int64_t batch_max_mem_offset = batch_atomic_mem_start;
  567. for (auto &iter_batch : normal_atomic_and_clean_nodes_map) {
  568. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  569. for (auto &iter : iter_batch.second) {
  570. int64_t atomic_mem_start = static_cast<int64_t>(mem_iter->second.mem_offset_);
  571. GELOGD("Begin to reAssign atomic memory, atomic address memory start = %ld", atomic_mem_start);
  572. for (auto &atomic_node : iter.second) {
  573. vector<int64_t> mem_offset_end;
  574. status = AssignAtomicOutputAndWorkspaceMemory(atomic_node, mem_offset_end);
  575. if (status != SUCCESS) {
  576. GELOGE(status, "Assign atomic output and workspace memory failed, node name is %s.",
  577. atomic_node->GetName().c_str());
  578. return status;
  579. }
  580. }
  581. int64_t atomic_mem_size = static_cast<int64_t>(mem_iter->second.mem_offset_) - atomic_mem_start;
  582. if (atomic_mem_size != 0) {
  583. GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM),
  584. "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str());
  585. }
  586. }
  587. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  588. }
  589. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  590. batch_atomic_mem_start = batch_max_mem_offset;
  591. for (auto &iter_batch : connecting_output_atomic_nodes) {
  592. mem_iter->second.mem_offset_ = batch_atomic_mem_start;
  593. if (AssignConnectNetOutputAtomicMemory(iter_batch.second) != SUCCESS) {
  594. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  595. return FAILED;
  596. }
  597. batch_max_mem_offset = std::max(batch_max_mem_offset, static_cast<int64_t>(mem_iter->second.mem_offset_));
  598. }
  599. mem_iter->second.mem_offset_ = static_cast<size_t>(batch_max_mem_offset);
  600. return SUCCESS;
  601. }
  602. Status GraphMemoryAssigner::FilterAtomicNodesForMemoryAssign(
  603. map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map,
  604. map<string, vector<NodePtr>> &connecting_output_atomic_nodes) {
  605. GE_CHECK_NOTNULL(compute_graph_);
  606. for (const auto &node : compute_graph_->GetAllNodes()) {
  607. if (node->GetType() == ATOMICADDRCLEAN) {
  608. map<string, vector<NodePtr>> tmp_normal_atomic_nodes;
  609. const auto &out_control_anchor = node->GetOutControlAnchor();
  610. GE_CHECK_NOTNULL(out_control_anchor);
  611. for (const auto &peer_in_control_anchor : out_control_anchor->GetPeerInControlAnchors()) {
  612. if (peer_in_control_anchor != nullptr) {
  613. auto peer_in_node = peer_in_control_anchor->GetOwnerNode();
  614. auto peer_in_node_desc = peer_in_node->GetOpDesc();
  615. if (peer_in_node_desc != nullptr) {
  616. bool is_atomic_node = false;
  617. // If GetBool fail, is_atomic_node is false.
  618. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic_node);
  619. if (is_atomic_node) {
  620. bool is_reference = false;
  621. // If GetBool fail, is_reference is false.
  622. (void) ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_REFERENCE, is_reference);
  623. if (is_reference) {
  624. std::string error = "Op" + FmtToStr(peer_in_node_desc->GetName()) +
  625. " cannot have both atomic and is_reference attribute.";
  626. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  627. return ge::PARAM_INVALID;
  628. }
  629. std::string batch_label;
  630. (void)ge::AttrUtils::GetStr(peer_in_node_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  631. vector<int> is_connecting_output;
  632. // If GetBool fail, attr is_connecting_output is an empty vector.
  633. (void) ge::AttrUtils::GetListInt(peer_in_node_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connecting_output);
  634. if (is_connecting_output.empty()) {
  635. tmp_normal_atomic_nodes[batch_label].emplace_back(peer_in_node);
  636. continue;
  637. }
  638. connecting_output_atomic_nodes[batch_label].emplace_back(peer_in_node);
  639. tmp_normal_atomic_nodes[batch_label].clear();
  640. break;
  641. }
  642. }
  643. }
  644. }
  645. for (auto &it_atomic_node : tmp_normal_atomic_nodes) {
  646. if (!it_atomic_node.second.empty()) {
  647. normal_atomic_nodes_map[it_atomic_node.first][node] = it_atomic_node.second;
  648. }
  649. }
  650. }
  651. }
  652. return SUCCESS;
  653. }
  654. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  655. vector<int64_t> &mem_offset_end) {
  656. auto node_op_desc = node->GetOpDesc();
  657. // Assign atomic node output memory
  658. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  659. if (ret != SUCCESS) {
  660. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  661. return ret;
  662. }
  663. // Check and assign atomic node workspace memory
  664. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  665. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  666. if (!atomic_workspace_info.empty()) {
  667. bool is_fusion_node = false;
  668. // If GetBool fail, is_fusion_node is false.
  669. (void) ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  670. if (is_fusion_node) {
  671. // Assign fusion atomic node workspace memory
  672. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  673. } else {
  674. // Assign single ordinary atomic node workspace memory, not include fusion node
  675. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  676. }
  677. if (ret != SUCCESS) {
  678. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  679. return ret;
  680. }
  681. } else {
  682. GELOGW("Current atomic node %s does not have attr ATOMIC_WORKSPACE_INFO.", node->GetName().c_str());
  683. }
  684. return SUCCESS;
  685. }
  686. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  687. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  688. if (iter == memory_offset_.end()) {
  689. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  690. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  691. return FAILED;
  692. }
  693. for (auto &node : connect_netoutput_nodes) {
  694. GE_CHECK_NOTNULL(node);
  695. if (node->GetOpDesc() == nullptr) {
  696. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  697. continue;
  698. }
  699. // Atomic memory start addr
  700. int64_t original_atomic_mem_start = static_cast<int64_t>(iter->second.mem_offset_);
  701. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  702. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  703. vector<int64_t> mem_offset_end;
  704. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  705. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  706. return FAILED;
  707. }
  708. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  709. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) {
  710. GELOGE(FAILED, "Failed to set atomic attr separately.");
  711. return FAILED;
  712. }
  713. }
  714. return SUCCESS;
  715. }
  716. Status GraphMemoryAssigner::AssignReferenceMemory() {
  717. for (auto &node : compute_graph_->GetDirectNode()) {
  718. // Get the reference type of the node, default is false
  719. bool is_ref = false;
  720. // If GetBool fail, is_ref is false.
  721. (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  722. if (!is_ref) {
  723. continue;
  724. }
  725. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  726. node->GetName().c_str());
  727. auto out_op_desc = node->GetOpDesc();
  728. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  729. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  730. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  731. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  732. out_op_desc->GetOutputsSize(), output_list.size());
  733. return ge::FAILED;
  734. }
  735. map<string, int> input_name_index;
  736. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  737. int index = out_op_desc->GetInputIndexByName(input_name);
  738. input_name_index.emplace(input_name, index);
  739. }
  740. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  741. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  742. auto iter = input_name_index.find(out_data_anchor_name);
  743. if (iter != input_name_index.end()) {
  744. int index = iter->second;
  745. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  746. iter->first.c_str(), out_data_anchor_name.c_str());
  747. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  748. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  749. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  750. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  751. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  752. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  753. GE_CHECK_NOTNULL(peer_out_op_desc);
  754. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  755. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  756. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  757. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  758. } else {
  759. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  760. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  761. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  762. }
  763. }
  764. out_op_desc->SetOutputOffset(output_list);
  765. }
  766. return ge::SUCCESS;
  767. }
  768. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  769. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  770. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  771. if (peer_out_data_anchor == nullptr) {
  772. continue;
  773. }
  774. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  775. if (peer_op_desc == nullptr) {
  776. continue;
  777. }
  778. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  779. (peer_op_desc->GetType() == VARIABLE)) {
  780. std::string error = "Op" + FmtToStr(node->GetName()) + "'s peer out node" +
  781. FmtToStr(peer_op_desc->GetName()) + " is invalid, only support Constant/AippData/Variable";
  782. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  783. return false;
  784. }
  785. }
  786. return true;
  787. }
  788. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  789. auto op_desc = node->GetOpDesc();
  790. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  791. mem_offset_end.clear();
  792. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  793. vector<int64_t> atomic_output_index;
  794. // If GetListInt fail, atomic_output_index is empty.
  795. (void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  796. // Check atomic output
  797. vector<int64_t> output_list = op_desc->GetOutputOffset();
  798. if (atomic_output_index.size() > output_list.size()) {
  799. std::string error = "Op" + FmtToStr(node->GetName()) +
  800. "'s size of atomic_output_index is more than the size of output_list";
  801. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  802. return ge::FAILED;
  803. }
  804. auto output_list_size = static_cast<int64_t>(output_list.size());
  805. auto iter = memory_offset_.find(RT_MEMORY_HBM);
  806. if (iter == memory_offset_.end()) {
  807. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  808. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  809. return FAILED;
  810. }
  811. for (auto &output_index : atomic_output_index) {
  812. if (output_index >= output_list_size) {
  813. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  814. " is more than the size" + FmtToStr(output_list_size) + " of output_list.";
  815. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  816. return ge::PARAM_INVALID;
  817. }
  818. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  819. bool is_assigned_mem = false;
  820. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  821. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  822. return ge::FAILED;
  823. }
  824. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  825. if (is_assigned_mem) {
  826. GELOGI(
  827. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  828. "ReAssignContinuousMemory function.",
  829. op_desc->GetName().c_str());
  830. continue;
  831. }
  832. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  833. int64_t size = 0;
  834. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  835. GELOGI("Get size failed");
  836. }
  837. output_list[output_index] = iter->second.mem_offset_;
  838. std::string batch_label;
  839. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  840. GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%ld] "
  841. "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(),
  842. node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM,
  843. size, size, batch_label.c_str());
  844. iter->second.mem_offset_ += size;
  845. AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM);
  846. mem_offset_end.emplace_back(iter->second.mem_offset_);
  847. }
  848. op_desc->SetOutputOffset(output_list);
  849. return ge::SUCCESS;
  850. }
  851. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  852. bool &is_mem_assigned) {
  853. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  854. std::string error = "Op" + FmtToStr(node->GetName()) + "'s output index" + FmtToStr(output_index) +
  855. " is more than the size of node's AllOutDataAnchors.";
  856. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  857. return ge::PARAM_INVALID;
  858. }
  859. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  860. GE_CHECK_NOTNULL(out_data_anchor);
  861. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  862. for (auto &input_anchor : input_anchors) {
  863. auto output_node = input_anchor->GetOwnerNode();
  864. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  865. /// has been assigned
  866. vector<int64_t> atomic_input_index;
  867. (void) ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  868. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  869. is_mem_assigned = true;
  870. break;
  871. }
  872. }
  873. return SUCCESS;
  874. }
  875. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  876. map<string, map<int64_t, int64_t>> &workspace_info,
  877. vector<int64_t> &mem_offset_end) {
  878. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  879. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  880. if (mem_type_iter == memory_offset_.end()) {
  881. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  882. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  883. return FAILED;
  884. }
  885. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  886. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  887. if (op_desc->GetName() != iter->first) {
  888. std::string error = "The node name" + FmtToStr(op_desc->GetName()) +
  889. " and the node name" + FmtToStr(iter->first) + " in workspace info are inconsistent.";
  890. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  891. return ge::PARAM_INVALID;
  892. }
  893. if (iter->second.empty()) {
  894. continue;
  895. }
  896. for (auto &info_iter : iter->second) {
  897. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  898. auto workspace_size = info_iter.second;
  899. if (workspace_index >= workspace_vector.size()) {
  900. std::string error = "The workspace index" + FmtToStr(workspace_index) +
  901. " is more than the size" + FmtToStr(workspace_vector.size()) + " of workspace vector.";
  902. GE_ERRORLOG_AND_ERRORMSG(ge::PARAM_INVALID, error.c_str());
  903. return ge::PARAM_INVALID;
  904. }
  905. workspace_vector[workspace_index] = mem_type_iter->second.mem_offset_;
  906. std::string batch_label;
  907. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  908. GELOGI(
  909. "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  910. "memtype[%ld] size[%ld] real_size[%ld] batch[%s].",
  911. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index,
  912. mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size,
  913. batch_label.c_str());
  914. mem_type_iter->second.mem_offset_ += workspace_size;
  915. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  916. }
  917. }
  918. op_desc->SetWorkspace(workspace_vector);
  919. return SUCCESS;
  920. }
  921. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  922. map<string, map<int64_t, int64_t>> &workspace_info,
  923. vector<int64_t> &mem_offset_end) {
  924. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  925. auto mem_type_iter = memory_offset_.find(RT_MEMORY_HBM);
  926. if (mem_type_iter == memory_offset_.end()) {
  927. std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM);
  928. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  929. return FAILED;
  930. }
  931. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  932. for (auto &iter : workspace_info) {
  933. if (iter.second.empty()) {
  934. continue;
  935. }
  936. map<int64_t, int64_t> index_offset;
  937. for (auto &info_iter : iter.second) {
  938. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  939. auto workspace_size = info_iter.second;
  940. size_t workspace_offset = mem_type_iter->second.mem_offset_;
  941. std::string batch_label;
  942. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label);
  943. GELOGI(
  944. "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  945. "memtype[%ld] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(),
  946. op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_,
  947. op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str());
  948. mem_type_iter->second.mem_offset_ += workspace_size;
  949. mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_);
  950. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  951. }
  952. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  953. }
  954. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  955. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  956. return FAILED;
  957. }
  958. return SUCCESS;
  959. }
  960. Status GraphMemoryAssigner::CheckOffset() {
  961. std::map<std::string, std::string> anchor_to_symbol;
  962. std::map<std::string, std::list<NodeIndexIO>> symbol_to_anchors;
  963. if (GraphUtils::GetRefMapping(compute_graph_, symbol_to_anchors, anchor_to_symbol) != GRAPH_SUCCESS) {
  964. GELOGE(FAILED, "Get ref-mapping for graph %s failed.", compute_graph_->GetName().c_str());
  965. return FAILED;
  966. }
  967. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  968. GE_CHECK_NOTNULL(node->GetOpDesc());
  969. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  970. for (auto input : input_list) {
  971. if (input == ge::kInvalidOffset) {
  972. std::string error = "Invalid input offset" + FmtToStr(ge::kInvalidOffset) +
  973. + " in node" + FmtToStr(node->GetName());
  974. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  975. return FAILED;
  976. }
  977. }
  978. bool need_update_output = false;
  979. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  980. for (uint32_t i = 0; i < output_list.size(); ++i) {
  981. if (output_list[i] == ge::kInvalidOffset) {
  982. std::string error = "Invalid output offset" + FmtToStr(ge::kInvalidOffset) +
  983. + " in node" + FmtToStr(node->GetName());
  984. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  985. return FAILED;
  986. }
  987. if (node->GetType() == IDENTITY || node->GetType() == READVARIABLEOP) {
  988. auto symbol_offset = GetSymbolOutputOffset(anchor_to_symbol, symbol_to_anchors, node, i);
  989. if (symbol_offset != ge::kInvalidOffset && output_list[i] != symbol_offset) {
  990. output_list[i] = symbol_offset;
  991. need_update_output = true;
  992. }
  993. }
  994. }
  995. if (need_update_output) {
  996. node->GetOpDesc()->SetOutputOffset(output_list);
  997. }
  998. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  999. for (auto workspace : workspace_list) {
  1000. if (workspace == ge::kInvalidOffset) {
  1001. std::string error = "Invalid workspace" + FmtToStr(ge::kInvalidOffset) +
  1002. + " in node" + FmtToStr(node->GetName());
  1003. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1004. GELOGE(FAILED, "Invalid workspace in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1005. return FAILED;
  1006. }
  1007. }
  1008. }
  1009. return SUCCESS;
  1010. }
  1011. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1012. if (memory_offset_.empty()) {
  1013. GELOGE(FAILED, "memory_offset_ is empty.");
  1014. return FAILED;
  1015. }
  1016. for (auto pair : memory_offset_) {
  1017. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(),
  1018. pair.second.mem_offset_, pair.first);
  1019. }
  1020. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1021. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1022. GELOGE(ge::FAILED, "Update op input offset failed");
  1023. return ge::FAILED;
  1024. }
  1025. }
  1026. return ge::SUCCESS;
  1027. }
  1028. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1029. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1030. return node;
  1031. }
  1032. if (NodeUtils::IsDynamicShape(node)) {
  1033. return node;
  1034. }
  1035. return NodeUtils::GetParentInput(node);
  1036. }
  1037. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1038. uint32_t parent_index = 0;
  1039. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1040. return SUCCESS;
  1041. }
  1042. // Subgraph Data Node, check for constant input.
  1043. std::string op_type;
  1044. const auto &in_node = NodeUtils::GetParentInput(node);
  1045. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1046. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1047. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1048. return SUCCESS; // Constant input.
  1049. }
  1050. // Memory allocated for dynamic shape subgraph Data.
  1051. if (NodeUtils::IsDynamicShape(node)) {
  1052. return SUCCESS;
  1053. }
  1054. const auto &owner = node->GetOwnerComputeGraph();
  1055. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1056. const auto parent_inputs = parent_desc->GetInputOffset();
  1057. if (parent_inputs.size() <= parent_index) {
  1058. std::string error = "Get Parent input offset failed, node is " + FmtToStr(node->GetName()) +
  1059. + ", input_size is " + FmtToStr(parent_inputs.size()) + ", parent index is " +
  1060. FmtToStr(parent_index);
  1061. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1062. return FAILED;
  1063. }
  1064. input_list = {parent_inputs[parent_index]};
  1065. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1066. return SUCCESS;
  1067. }
  1068. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1069. vector<int64_t> origin_input_list;
  1070. vector<int64_t> memory_type;
  1071. auto tmp_op_desc = node->GetOpDesc();
  1072. origin_input_list = tmp_op_desc->GetInputOffset();
  1073. int64_t valid_input_index = 0;
  1074. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1075. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1076. vector<int64_t> output_list;
  1077. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1078. if (peer_out_anchor == nullptr) {
  1079. continue;
  1080. }
  1081. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1082. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1083. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1084. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1085. output_list = last_peer_out_op_desc->GetOutputOffset();
  1086. auto out_index = static_cast<unsigned long>(peer_out_anchor->GetIdx());
  1087. if (output_list.size() > static_cast<size_t>(out_index)) {
  1088. int64_t input_offset = output_list.at(out_index);
  1089. if (has_mem_type_attr && !origin_input_list.empty()) {
  1090. auto input_size = tmp_op_desc->GetInputsSize();
  1091. auto ori_input_offset_list_size = origin_input_list.size();
  1092. auto mem_type_size = memory_type.size();
  1093. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1094. std::string error = "fusion: node" + FmtToStr(tmp_op_desc->GetName()) +
  1095. + " input_size" + FmtToStr(input_size) + " diff from memory_type_size" +
  1096. FmtToStr(mem_type_size) + " from ori_input_offset_list_size" +
  1097. FmtToStr(ori_input_offset_list_size);
  1098. GE_ERRORLOG_AND_ERRORMSG(ge::FAILED, error.c_str());
  1099. return ge::FAILED;
  1100. }
  1101. // not hbm keep orignal inputoffest
  1102. // hbm inputoffset = original inputoffset + outputoffset
  1103. input_offset = (memory_type[valid_input_index] == RT_MEMORY_L1 ? origin_input_list[valid_input_index]
  1104. : origin_input_list[valid_input_index] + output_list.at(out_index));
  1105. }
  1106. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1107. if (in_node->GetType() == CONSTANT) {
  1108. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(static_cast<uint32_t>(anchor->GetIdx()));
  1109. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
  1110. }
  1111. GELOGD("%s node[%s] input[%ld] is set from node[%s] out index[%lu] offset[%ld]",
  1112. has_mem_type_attr ? "Fusion" : "",
  1113. tmp_op_desc->GetName().c_str(),
  1114. valid_input_index,
  1115. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(),
  1116. out_index,
  1117. input_offset);
  1118. input_list.emplace_back(input_offset);
  1119. valid_input_index++;
  1120. }
  1121. }
  1122. return ge::SUCCESS;
  1123. }
  1124. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1125. GE_CHECK_NOTNULL(node->GetOpDesc());
  1126. vector<int64_t> input_list;
  1127. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1128. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1129. vector<int64_t> output_list;
  1130. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1131. if (peer_out_anchor == nullptr) {
  1132. continue;
  1133. }
  1134. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1135. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1136. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1137. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1138. if (last_peer_out_node->GetType() != VARIABLE) {
  1139. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1140. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1141. output_list = last_peer_out_op_desc->GetOutputOffset();
  1142. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1143. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1144. }
  1145. } else {
  1146. vector<int64_t> cur_node_input_list;
  1147. auto cur_node_op_desc = node->GetOpDesc();
  1148. GE_CHECK_NOTNULL(cur_node_op_desc);
  1149. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1150. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1151. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1152. }
  1153. }
  1154. }
  1155. } else if (node->GetType() == DATA_TYPE) {
  1156. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1157. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1158. return FAILED;
  1159. }
  1160. } else {
  1161. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1162. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1163. return FAILED;
  1164. }
  1165. }
  1166. node->GetOpDesc()->SetInputOffset(input_list);
  1167. return SUCCESS;
  1168. }
  1169. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1170. const vector<int64_t> &mem_offset_end, int64_t memory_type) {
  1171. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1172. // Parsing offset and size vectors
  1173. vector<int64_t> memory_offset_start;
  1174. vector<int64_t> memory_offset_size;
  1175. memory_offset_start.emplace_back(atomic_mem_start);
  1176. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1177. memory_offset_start.emplace_back(mem_offset_end[i]);
  1178. // Number 1 means element index
  1179. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1180. memory_offset_size.emplace_back(size);
  1181. }
  1182. memory_offset_start.pop_back();
  1183. const auto &in_control_anchor = node->GetInControlAnchor();
  1184. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1185. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1186. if (peer_out_control_anchor == nullptr) {
  1187. continue;
  1188. }
  1189. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1190. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1191. if (peer_out_node_desc == nullptr) {
  1192. continue;
  1193. }
  1194. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1195. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1196. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1197. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) {
  1198. GELOGE(FAILED, "Set atomic clean attr failed.");
  1199. return FAILED;
  1200. }
  1201. }
  1202. }
  1203. }
  1204. return SUCCESS;
  1205. }
  1206. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector<int64_t> &atomic_mem_start,
  1207. const vector<int64_t> &atomic_mem_size, int64_t memory_type) {
  1208. auto node_op_desc = node->GetOpDesc();
  1209. if (node_op_desc != nullptr) {
  1210. GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str());
  1211. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1212. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1213. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1214. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1215. node_op_desc->SetWorkspace(workspace_vector);
  1216. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1217. std::vector<int64_t> mem_start_vector;
  1218. // If GetListInt fail, mem_start_vector is empty.
  1219. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1220. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1221. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1222. GELOGE(FAILED, "SetListInt failed.");
  1223. return FAILED);
  1224. std::vector<int64_t> mem_size_vector;
  1225. // If GetListInt fail, mem_size_vector is empty.
  1226. (void) ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1227. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1228. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1229. GELOGE(FAILED, "SetListInt failed.");
  1230. return FAILED);
  1231. std::stringstream ss;
  1232. for (auto iter : atomic_mem_start) {
  1233. ss << iter << " ";
  1234. }
  1235. string atomic_mem_start_str = ss.str();
  1236. ss.clear();
  1237. ss.str("");
  1238. for (auto iter : atomic_mem_size) {
  1239. ss << iter << " ";
  1240. }
  1241. string atomic_mem_size_str = ss.str();
  1242. GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]"
  1243. " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1244. node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type,
  1245. atomic_mem_size_str.c_str());
  1246. }
  1247. return SUCCESS;
  1248. }
  1249. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size, int64_t memory_type) {
  1250. if (mem_align_size <= 0) {
  1251. return;
  1252. }
  1253. auto iter = memory_offset_.find(memory_type);
  1254. if (iter == memory_offset_.end()) {
  1255. GELOGW("Memory offset don't have memory type[%ld].", memory_type);
  1256. return;
  1257. }
  1258. iter->second.mem_offset_ =
  1259. (iter->second.mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1260. }
  1261. ge::Status GraphMemoryAssigner::GetNodeListMemoryType(const vector<NodePtr> &nodes, int32_t mem_reuse_model,
  1262. int64_t &memory_type) {
  1263. memory_type = RT_MEMORY_HBM;
  1264. // In the dynamic batch scenario, the memory attributes of nodes are the same.
  1265. for (auto &n : nodes) {
  1266. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  1267. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed.")
  1268. break;
  1269. }
  1270. if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  1271. GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed.");
  1272. break;
  1273. }
  1274. }
  1275. return SUCCESS;
  1276. }
  1277. ge::Status GraphMemoryAssigner::GetNodeMemoryType(const NodePtr &node, int64_t &memory_type, string input_or_output) {
  1278. memory_type = RT_MEMORY_HBM;
  1279. vector<int64_t> mem_type_list;
  1280. if (input_or_output == "input") {
  1281. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_INPUT_MEM_TYPE_LIST, mem_type_list);
  1282. }
  1283. if (input_or_output == "output") {
  1284. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATTR_NAME_OUTPUT_MEM_TYPE_LIST, mem_type_list);
  1285. }
  1286. if (mem_type_list.empty()) {
  1287. if (memory_offset_.find(memory_type) == memory_offset_.end()) {
  1288. std::string error = "Memory offset map does not have memory type" + FmtToStr(memory_type) +
  1289. + ", opname is " + FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1290. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1291. return FAILED;
  1292. }
  1293. return SUCCESS;
  1294. }
  1295. if (mem_type_list.size() != node->GetAllInDataAnchorsSize()) {
  1296. std::string error = "The size" + FmtToStr(mem_type_list.size()) +
  1297. " of mem type list is not equal to the size of in data anchor" +
  1298. FmtToStr(node->GetAllInDataAnchorsSize()) + ", opname is " +
  1299. FmtToStr(node->GetName()) + ", optype is " + FmtToStr(node->GetType());
  1300. GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
  1301. return FAILED;
  1302. }
  1303. if (!CheckContinuousMemType(mem_type_list)) {
  1304. GELOGE(FAILED, "Check continuous memory type failed.");
  1305. return FAILED;
  1306. }
  1307. // It is continuous memory and memory type is the same, so use the first memory.
  1308. memory_type = mem_type_list[0];
  1309. return SUCCESS;
  1310. }
  1311. bool GraphMemoryAssigner::CheckContinuousMemType(vector<int64_t> mem_type_list) {
  1312. if (mem_type_list.size() == 0) {
  1313. return true;
  1314. }
  1315. int64_t mem_type_tmp = mem_type_list[0];
  1316. for (auto mem_type : mem_type_list) {
  1317. if (mem_type != mem_type_tmp) {
  1318. std::string error = "The memory is continuous, but the type of the input memory is inconsistent. They are " +
  1319. FmtToStr(mem_type_tmp) + " and " + FmtToStr(mem_type);
  1320. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1321. GELOGW("The memory is continuous, but the type of the input memory is inconsistent. They are [%ld] and [%ld].",
  1322. mem_type_tmp, mem_type);
  1323. return false;
  1324. }
  1325. }
  1326. if (memory_offset_.find(mem_type_tmp) == memory_offset_.end()) {
  1327. std::string error = "Memory offset map does not have memory type" + FmtToStr(mem_type_tmp);
  1328. ErrorManager::GetInstance().ATCReportErrMessage("E10043", {"reason"}, {error});
  1329. GELOGW("Memory offset map does not have memory type[%ld].", mem_type_tmp);
  1330. return false;
  1331. }
  1332. return true;
  1333. }
  1334. void GraphMemoryAssigner::PrintMemoryOffset() {
  1335. for (auto pair : memory_offset_) {
  1336. // Assign memory of max batch nodes that have the same batch label.
  1337. GELOGD("Reassign memory for max batch virtual nodes, memory type = %ld, memory offset = %zu.",
  1338. pair.first, pair.second.mem_offset_);
  1339. }
  1340. }
  1341. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示