You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_mem_assigner.cc 68 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/build/memory/graph_mem_assigner.h"
  17. #include <cstring>
  18. #include <set>
  19. #include "common/math/math_util.h"
  20. #include "framework/common/debug/ge_log.h"
  21. #include "graph/build/memory/hybrid_mem_assigner.h"
  22. #include "graph/build/memory/var_mem_assign_util.h"
  23. #include "graph/build/memory/block_mem_assigner.h"
  24. #include "graph/common/omg_util.h"
  25. #include "graph/debug/ge_attr_define.h"
  26. #include "graph/ge_attr_value.h"
  27. #include "graph/manager/graph_var_manager.h"
  28. #include "graph/utils/tensor_utils.h"
  29. #include "graph/utils/type_utils.h"
  30. namespace {
  31. const int kDataOutputIndex = 0;
  32. const int kAllInputAddrIsAtomic = -1;
  33. const int kVirtualInputNodeMemoryReuse = 0;
  34. const int kVirtualOutputNodeMemoryReuse = 1;
  35. const size_t kVirtualInputNodeOutputSize = 1;
  36. const size_t kVirtualOutputNodeInputSize = 1;
  37. const size_t kVirtualNodeDataIndex = 0;
  38. const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_";
  39. } // namespace
  40. namespace ge {
  41. Status VariableMemoryAssigner::Assign() {
  42. Status result = ge::VarMemAssignUtil::AssignConstantOpMemory(compute_graph_);
  43. if (result != ge::SUCCESS) {
  44. return result;
  45. }
  46. result = ge::VarMemAssignUtil::AssignVarMemory(compute_graph_);
  47. if (result != ge::SUCCESS) {
  48. return result;
  49. }
  50. return ge::SUCCESS;
  51. }
  52. Status VariableMemoryAssigner::AssignVarAttr2Nodes() {
  53. Status result = ge::VarMemAssignUtil::AssignVarAttr2Nodes(compute_graph_);
  54. if (result != ge::SUCCESS) {
  55. return result;
  56. }
  57. return ge::SUCCESS;
  58. }
  59. Status GraphMemoryAssigner::AssignMemory() {
  60. ge::HybridMemAssignerPtr mem_assigner(new (std::nothrow) HybridMemAssigner(compute_graph_));
  61. if (mem_assigner->Assign() != ge::SUCCESS) {
  62. GELOGE(ge::FAILED, "Memory assigner failed");
  63. return ge::FAILED;
  64. }
  65. MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
  66. memory_offset_.push_back(memory_offset);
  67. auto session_id = compute_graph_->GetSessionID();
  68. int64_t var_size_before_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM);
  69. auto variable_assigner =
  70. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  71. if (variable_assigner == nullptr) {
  72. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  73. return ge::FAILED;
  74. }
  75. if (variable_assigner->Assign() != ge::SUCCESS) {
  76. return ge::FAILED;
  77. }
  78. int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
  79. GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
  80. mem_assigner_ = std::move(mem_assigner);
  81. return ge::SUCCESS;
  82. }
  83. ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() {
  84. auto variable_assigner =
  85. std::unique_ptr<ge::VariableMemoryAssigner>(new (std::nothrow) ge::VariableMemoryAssigner(compute_graph_));
  86. if (variable_assigner == nullptr) {
  87. GELOGE(ge::FAILED, "Alloc VariableMemoryAssigner failed.");
  88. return ge::FAILED;
  89. }
  90. if (variable_assigner->AssignVarAttr2Nodes() != ge::SUCCESS) {
  91. return ge::FAILED;
  92. }
  93. return ge::SUCCESS;
  94. }
  95. ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc,
  96. int64_t dim_index, int64_t &output_mem_size,
  97. int64_t &batch_dim_num, int64_t &out_size) {
  98. graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size);
  99. if (graph_status != GRAPH_SUCCESS) {
  100. GELOGE(FAILED, "Opdesc GetSize failed!");
  101. return FAILED;
  102. }
  103. GeShape output_shape = output_desc->GetShape();
  104. std::vector<int64_t> output_dims = output_shape.GetDims();
  105. if (dim_index >= static_cast<int64_t>(output_dims.size())) {
  106. GELOGE(FAILED, "Invaild value(%ld) of attr _reuse_input_on_dim_index, which is out of data range [0, %zu).",
  107. dim_index, output_dims.size());
  108. return FAILED;
  109. }
  110. for (int64_t index = 0; index < dim_index; index++) {
  111. FMK_INT64_MULCHECK(batch_dim_num, output_dims[index]);
  112. batch_dim_num *= output_dims[index];
  113. output_dims[index] = 1;
  114. }
  115. output_shape = GeShape(output_dims);
  116. Format out_format = output_desc->GetFormat();
  117. DataType data_type = output_desc->GetDataType();
  118. graph_status = ge::TensorUtils::CalcTensorMemSize(output_shape, out_format, data_type, output_mem_size);
  119. if (graph_status != GRAPH_SUCCESS) {
  120. GELOGE(graph_status, "Opdesc CalcTensorMemSize failed!");
  121. return FAILED;
  122. }
  123. if (output_mem_size < 0) {
  124. GELOGE(FAILED, "After calculating tensor memory size, output_mem_size = %ld, out of data range [0, %ld]",
  125. output_mem_size, INT64_MAX);
  126. return FAILED;
  127. }
  128. return SUCCESS;
  129. }
  130. Status GraphMemoryAssigner::GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map,
  131. int32_t mem_reuse_model, string &max_batch_label) {
  132. for (auto &i_map : mem_reuse_virtual_nodes_map) {
  133. vector<NodePtr> virtual_nodes_list = i_map.second;
  134. vector<int64_t> max_shape_dims;
  135. size_t max_batch_dim = 0;
  136. bool max_batch_dim_find = false;
  137. for (size_t i = 0; i < virtual_nodes_list.size(); ++i) {
  138. GE_CHECK_NOTNULL(virtual_nodes_list[i]);
  139. OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc();
  140. GE_CHECK_NOTNULL(op_desc);
  141. ge::ConstGeTensorDescPtr input_output_desc;
  142. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  143. input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex);
  144. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  145. input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex);
  146. } else {
  147. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  148. return FAILED;
  149. }
  150. GE_CHECK_NOTNULL(input_output_desc);
  151. if (i == 0) {
  152. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  153. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  154. max_shape_dims = input_output_desc->GetShape().GetDims();
  155. } else {
  156. vector<int64_t> current_shape_dims = input_output_desc->GetShape().GetDims();
  157. if (current_shape_dims.size() != max_shape_dims.size()) {
  158. GELOGE(FAILED, "The shape size of several nodes between multiple batches does not match.");
  159. return FAILED;
  160. }
  161. for (size_t j = 0; j < current_shape_dims.size(); ++j) {
  162. if (current_shape_dims[j] == max_shape_dims[j]) {
  163. continue;
  164. }
  165. if (max_batch_dim_find && max_batch_dim != j) {
  166. GELOGE(FAILED, "The shape of several nodes between multiple batches does not match.");
  167. return FAILED;
  168. }
  169. max_batch_dim_find = true;
  170. max_batch_dim = j;
  171. if (current_shape_dims[j] > max_shape_dims[j]) {
  172. max_shape_dims[j] = current_shape_dims[j];
  173. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  174. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label);
  175. }
  176. // Only compare the first different dim in shape.
  177. break;
  178. }
  179. }
  180. }
  181. // In every element of virtual_input_nodes_map, the label of the max batch node is the same.
  182. break;
  183. }
  184. return SUCCESS;
  185. }
  186. Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, size_t &mem_offset) {
  187. if (memory_offset_.empty()) {
  188. GELOGE(FAILED, "memory_offset_ is empty.");
  189. return ge::FAILED;
  190. }
  191. GE_CHK_STATUS_RET(ReAssignContinuousMemory(is_loop_graph), "ReAssignContinuousMemory Failed!");
  192. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousInputMemory(),
  193. "ReAssignReuseAndNoPaddingContinuousInputMemory Failed!");
  194. GE_CHK_STATUS_RET(ReAssignReuseAndNoPaddingContinuousOutputMemory(),
  195. "ReAssignReuseAndNoPaddingContinuousOutputMemory Failed!");
  196. GE_CHK_STATUS_RET(ReAssignAtomicMemory(is_loop_graph), "ReAssignAtomicMemory Failed!");
  197. mem_offset = memory_offset_[0].mem_offset_;
  198. auto session_id = compute_graph_->GetSessionID();
  199. if (mem_offset > VarManager::Instance(session_id)->GetGraphMemoryMaxSize()) {
  200. GELOGE(ge::FAILED, "Current memoffset %zu is greater than memory manager malloc max size %zu", mem_offset,
  201. VarManager::Instance(session_id)->GetGraphMemoryMaxSize());
  202. return ge::FAILED;
  203. }
  204. return SUCCESS;
  205. }
  206. Status GraphMemoryAssigner::AssignZeroCopyMemory(size_t &mem_offset, size_t &zero_mem_copy_size) {
  207. BlockMemAssignerPtr priority_assigner = std::move(mem_assigner_->GetPriorityAssinger());
  208. GE_IF_BOOL_EXEC(priority_assigner == nullptr, GELOGE(FAILED, "Get priority_assigner failed."); return ge::FAILED;);
  209. size_t mem_offset_tmp = mem_offset;
  210. // set offset for zero copy block
  211. for (auto &memory_block : priority_assigner->GetMemoryBlocks()) {
  212. if (memory_block == nullptr || memory_block->deleted_block_ || !memory_block->is_zero_copy_) {
  213. continue;
  214. }
  215. memory_block->Resize();
  216. memory_block->SetHeadOffset(mem_offset);
  217. mem_offset += memory_block->Size();
  218. memory_block->SetTailOffset(mem_offset - 1);
  219. }
  220. GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset);
  221. // set offset for zero copy nodes
  222. priority_assigner->SetOpMemOffset(true);
  223. zero_mem_copy_size = mem_offset - mem_offset_tmp;
  224. memory_offset_[0].mem_offset_ = mem_offset;
  225. GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset, mem_offset_tmp, zero_mem_copy_size);
  226. return SUCCESS;
  227. }
  228. Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
  229. GELOGI("Begin to reassign continuous memory");
  230. Status ret;
  231. for (auto &node : compute_graph_->GetAllNodes()) {
  232. // Get the continuous input type of the node, default is false
  233. bool is_input_continuous = false;
  234. GE_CHECK_NOTNULL(node->GetOpDesc());
  235. // If GetBool fail, is_input_continuous is false.
  236. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous);
  237. // Assign continuous input memory
  238. if (is_input_continuous) {
  239. int64_t mem_clean_start = 0;
  240. int64_t mem_clean_size = 0;
  241. ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size);
  242. if (ret != ge::SUCCESS) {
  243. GELOGE(ret, "Assign continuous input memory failed!");
  244. return ret;
  245. }
  246. // Clean up atomic address, eg, hcom node
  247. vector<int32_t> input_indexes;
  248. // If GetListInt fail, input_indexes is empty.
  249. (void)ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes);
  250. if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) {
  251. // check whether there is an atomic conflict between the current node and the peer out node
  252. if (!CheckInputIsSupportAtomic(node)) {
  253. GELOGE(ge::FAILED,
  254. "There is an atomic conflict between the current node and the peer out node, not supported!");
  255. return ge::FAILED;
  256. } else if (is_loop_graph) {
  257. GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, mem_clean_start));
  258. } else {
  259. GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {mem_clean_start}, {mem_clean_size}),
  260. "SetAtomicCleanAttr failed.");
  261. }
  262. }
  263. }
  264. // Get the reference type of the node, default is false
  265. bool is_ref = false;
  266. // If GetBool fail, is_ref is false.
  267. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  268. // Get the continuous output type of the node, default is false
  269. bool is_output_continuous = false;
  270. // If GetBool fail, is_output_continuous is false.
  271. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous);
  272. // If the output is ref type and refers to the ref of an input, the name of the output
  273. // and the input are the same. Ge encounters ref type, finds matching relationship according
  274. // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast
  275. if (!is_ref && is_output_continuous) { // Assign continuous output memory
  276. ret = AssignContinuousOutputMemory(node);
  277. if (ret != ge::SUCCESS) {
  278. GELOGE(ret, "Assign reference memory failed!");
  279. return ret;
  280. }
  281. }
  282. }
  283. GELOGI("After reassign continuous memory, memoffset = %zu.", memory_offset_[0].mem_offset_);
  284. return ge::SUCCESS;
  285. }
  286. Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start,
  287. int64_t &continuous_mem_size) {
  288. GELOGI("Current node %s needs continuous input.", node->GetName().c_str());
  289. continuous_mem_start = memory_offset_[0].mem_offset_;
  290. bool continuous_input_alloc = false;
  291. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc);
  292. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  293. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  294. GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);
  295. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  296. GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue);
  297. bool is_peer_output_continuous = false;
  298. // If GetBool fail, is_peer_output_continuous is false.
  299. (void)ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous);
  300. // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and
  301. // continuous output of the previous node is the same, we can support it. If size != 1, there may be
  302. // conflict between the two, we can not support it.
  303. auto peer_output_size = peer_op_desc->GetOutputsSize();
  304. GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1),
  305. GELOGE(PARAM_INVALID,
  306. "Current node %s requires continuous input, while the previous node %s requires "
  307. "continuous output. There may be conflict between the two. This node is not supported now.",
  308. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  309. return PARAM_INVALID;);
  310. bool is_peer_reference = false;
  311. // If GetBool fail, is_peer_reference is false.
  312. (void)AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference);
  313. GE_IF_BOOL_EXEC(is_peer_reference,
  314. GELOGE(PARAM_INVALID,
  315. "Current node %s requires continuous input, while the previous node %s requires "
  316. "reference. There may be conflict between the two. This node is not supported now.",
  317. node->GetOpDesc()->GetName().c_str(), peer_op_desc->GetName().c_str());
  318. return PARAM_INVALID;);
  319. vector<int64_t> output_list = peer_op_desc->GetOutputOffset();
  320. std::vector<int64_t> offsets_for_fusion = {};
  321. bool has_offset_attr =
  322. AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion);
  323. if (peer_out_data_anchor->GetIdx() < static_cast<int>(output_list.size())) {
  324. if (continuous_input_alloc && !has_offset_attr) {
  325. if (in_data_anchor->GetIdx() == 0) {
  326. continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx());
  327. }
  328. // can not use else if, incase only one input
  329. if (in_data_anchor->GetIdx() == static_cast<int>(node->GetAllInDataAnchors().size()) - 1) {
  330. int64_t tensor_desc_size = 0;
  331. Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())),
  332. tensor_desc_size);
  333. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  334. tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  335. continuous_mem_size =
  336. output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
  337. }
  338. GELOGI(
  339. "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  340. "real_size[%u].",
  341. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
  342. peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
  343. 0, 0);
  344. continue;
  345. }
  346. output_list.at(peer_out_data_anchor->GetIdx()) = memory_offset_[0].mem_offset_;
  347. } else {
  348. GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx());
  349. return FAILED;
  350. }
  351. peer_op_desc->SetOutputOffset(output_list);
  352. size_t pre_mem_offset = memory_offset_[0].mem_offset_;
  353. int64_t tensor_desc_size = 0;
  354. if (has_offset_attr) {
  355. if (peer_out_data_anchor->GetIdx() < static_cast<int>(offsets_for_fusion.size())) {
  356. auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()];
  357. memory_offset_[0].mem_offset_ += offset_for_fusion;
  358. } else {
  359. GELOGE(FAILED, "fusion: peer node %s index : %d is out of range.", peer_op_desc->GetName().c_str(),
  360. peer_out_data_anchor->GetIdx());
  361. return FAILED;
  362. }
  363. } else {
  364. Status ret =
  365. TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size);
  366. GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;);
  367. memory_offset_[0].mem_offset_ += tensor_desc_size;
  368. }
  369. // If set tensor_actual_size, Memory alignment is not required.
  370. int32_t is_tensor_actual_size = 0;
  371. ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size);
  372. if (is_tensor_actual_size == 0) {
  373. AlignMemOffset(MEM_ALIGN_SIZE);
  374. }
  375. GELOGI(
  376. "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
  377. "real_size[%ld].",
  378. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(),
  379. pre_mem_offset, peer_op_desc->GetStreamId(), (memory_offset_[0].mem_offset_ - pre_mem_offset), tensor_desc_size);
  380. }
  381. memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
  382. if (!continuous_input_alloc) {
  383. continuous_mem_size = memory_offset_[0].mem_offset_ - continuous_mem_start;
  384. }
  385. return SUCCESS;
  386. }
  387. Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) {
  388. GELOGI("Current node %s needs continuous output.", node->GetName().c_str());
  389. auto out_op_desc = node->GetOpDesc();
  390. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  391. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  392. if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) {
  393. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  394. out_op_desc->GetOutputsSize(), output_list.size());
  395. return ge::FAILED;
  396. }
  397. size_t mem_offset = output_list[0];
  398. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  399. output_list[out_data_anchor->GetIdx()] = mem_offset;
  400. int64_t tensor_desc_size = 0;
  401. if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) !=
  402. ge::SUCCESS) {
  403. GELOGE(FAILED, "GetSize failed.");
  404. return FAILED;
  405. }
  406. mem_offset += tensor_desc_size;
  407. if (mem_offset <= 0) {
  408. return FAILED;
  409. }
  410. mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE;
  411. GELOGI(
  412. "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  413. "real_size[%ld].",
  414. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  415. output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size);
  416. }
  417. out_op_desc->SetOutputOffset(output_list);
  418. return ge::SUCCESS;
  419. }
  420. Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  421. OpDescPtr op_desc = node->GetOpDesc();
  422. vector<int64_t> output_list = op_desc->GetOutputOffset();
  423. if (output_list.empty()) {
  424. GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str());
  425. return FAILED;
  426. }
  427. output_list.at(0) = mem_offset_reuse;
  428. op_desc->SetOutputOffset(output_list);
  429. GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  430. int64_t attr_dim_index;
  431. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  432. if (!get_attr_dim_flag) {
  433. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  434. return FAILED;
  435. }
  436. size_t extra_memory_size = 0;
  437. for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
  438. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  439. GE_CHECK_NOTNULL(peer_out_data_anchor);
  440. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  441. GE_CHECK_NOTNULL(peer_op_desc);
  442. vector<int64_t> output_offsets = peer_op_desc->GetOutputOffset();
  443. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(output_offsets.size())) {
  444. GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  445. return ge::FAILED;
  446. }
  447. output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  448. peer_op_desc->SetOutputOffset(output_offsets);
  449. size_t pre_mem_offset = mem_offset_reuse;
  450. // Calculate tensor real size of each piece of data and out size of complete data
  451. ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx());
  452. GE_CHECK_NOTNULL(output_desc);
  453. int64_t output_mem_size;
  454. int64_t batch_dim_num = 1;
  455. int64_t out_size;
  456. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  457. SUCCESS) {
  458. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].",
  459. peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx());
  460. return FAILED;
  461. }
  462. mem_offset_reuse += output_mem_size;
  463. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  464. GELOGI(
  465. "[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] "
  466. "real_size[%ld].",
  467. node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx(),
  468. pre_mem_offset, peer_op_desc->GetStreamId(), out_size, output_mem_size);
  469. }
  470. mem_offset_reuse += extra_memory_size;
  471. size_t after_mem_offset = mem_offset_reuse;
  472. GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.",
  473. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  474. return SUCCESS;
  475. }
  476. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() {
  477. map<string, vector<NodePtr>> mem_reuse_virtual_input_nodes_map;
  478. for (const auto &n : compute_graph_->GetAllNodes()) {
  479. OpDescPtr op_desc = n->GetOpDesc();
  480. GE_CHECK_NOTNULL(op_desc);
  481. bool attr_continuous = false;
  482. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous);
  483. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  484. bool attr_reuse = false;
  485. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  486. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  487. if (attr_reuse && attr_continuous) {
  488. if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) {
  489. // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse.
  490. GELOGE(FAILED, "Only one output is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  491. op_desc->GetOutputsSize());
  492. return FAILED;
  493. }
  494. GELOGD("Start to reassign memory for virtual input node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  495. string batch_label_string;
  496. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  497. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  498. if (batch_label_string.empty()) {
  499. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  500. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  501. Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset);
  502. if (status != SUCCESS) {
  503. GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str());
  504. return FAILED;
  505. }
  506. memory_offset_[0].mem_offset_ = node_mem_offset;
  507. AlignMemOffset(MEM_ALIGN_SIZE);
  508. GELOGD("After reassign memory for virtual input node, align memory = %zu.", memory_offset_[0].mem_offset_);
  509. } else {
  510. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  511. string current_node_full_name = op_desc->GetName();
  512. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  513. if (pos == string::npos) {
  514. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.",
  515. kMbatchNodeNameFlag, n->GetName().c_str());
  516. return FAILED;
  517. }
  518. string fixed_name = current_node_full_name.substr(0, pos);
  519. vector<NodePtr> parallel_virtual_input_nodes;
  520. if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) {
  521. parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name];
  522. }
  523. parallel_virtual_input_nodes.emplace_back(n);
  524. mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes;
  525. }
  526. }
  527. }
  528. int32_t mem_reuse_model = 0;
  529. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) {
  530. GELOGE(FAILED, "Reassign memory of virtual input nodes failed.");
  531. return FAILED;
  532. }
  533. return SUCCESS;
  534. }
  535. Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) {
  536. OpDescPtr op_desc = node->GetOpDesc();
  537. // 1. set memory of to be reused input tensor
  538. auto in_data_anchor_list = node->GetAllInDataAnchors();
  539. auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor();
  540. GE_CHECK_NOTNULL(peer_out_data_anchor);
  541. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  542. GE_CHECK_NOTNULL(peer_op_desc);
  543. vector<int64_t> in_node_output_offsets = peer_op_desc->GetOutputOffset();
  544. if (peer_out_data_anchor->GetIdx() >= static_cast<int>(in_node_output_offsets.size())) {
  545. GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx());
  546. return FAILED;
  547. }
  548. in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse;
  549. peer_op_desc->SetOutputOffset(in_node_output_offsets);
  550. GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse);
  551. // 2. set memory of output tensor
  552. vector<int64_t> output_list = op_desc->GetOutputOffset();
  553. if (output_list.empty()) {
  554. GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str());
  555. return FAILED;
  556. }
  557. if (op_desc->GetOutputsSize() > output_list.size()) {
  558. GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(),
  559. output_list.size());
  560. return FAILED;
  561. }
  562. int64_t attr_dim_index;
  563. bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index);
  564. if (!get_attr_dim_flag) {
  565. GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed.");
  566. return FAILED;
  567. }
  568. size_t extra_memory_size = 0;
  569. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  570. output_list[out_data_anchor->GetIdx()] = mem_offset_reuse;
  571. size_t pre_mem_offset = mem_offset_reuse;
  572. // calculate tensor real size of each piece of data and out size of complete data
  573. ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx());
  574. GE_CHECK_NOTNULL(output_desc);
  575. int64_t output_mem_size;
  576. int64_t batch_dim_num = 1;
  577. int64_t out_size;
  578. if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) !=
  579. SUCCESS) {
  580. GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", op_desc->GetName().c_str(),
  581. out_data_anchor->GetIdx());
  582. return FAILED;
  583. }
  584. mem_offset_reuse += output_mem_size;
  585. extra_memory_size = extra_memory_size + out_size - output_mem_size;
  586. GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].",
  587. node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(),
  588. pre_mem_offset, out_size, output_mem_size);
  589. }
  590. op_desc->SetOutputOffset(output_list);
  591. mem_offset_reuse += extra_memory_size;
  592. size_t after_mem_offset = mem_offset_reuse;
  593. GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.",
  594. op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset);
  595. return SUCCESS;
  596. }
  597. Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() {
  598. map<string, vector<NodePtr>> mem_reuse_virtual_output_nodes_map;
  599. for (const auto &n : compute_graph_->GetAllNodes()) {
  600. OpDescPtr op_desc = n->GetOpDesc();
  601. GE_CHECK_NOTNULL(op_desc);
  602. bool attr_continuous = false;
  603. bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous);
  604. GE_IF_BOOL_EXEC(!get_continuous_flag, continue);
  605. bool attr_reuse = false;
  606. bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse);
  607. GE_IF_BOOL_EXEC(!get_reuse_flag, continue);
  608. if (attr_reuse && attr_continuous) {
  609. auto in_data_anchor_list = n->GetAllInDataAnchors();
  610. if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) {
  611. // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse.
  612. GELOGE(FAILED, "Only one input is supported, current virtual node %s has %zu inputs.", n->GetName().c_str(),
  613. in_data_anchor_list.size());
  614. return FAILED;
  615. }
  616. GELOGD("Start to reassign memory for virtual output node, memory offset = %zu.", memory_offset_[0].mem_offset_);
  617. string batch_label_string;
  618. // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter
  619. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  620. if (batch_label_string.empty()) {
  621. size_t node_mem_offset = memory_offset_[0].mem_offset_;
  622. // No ATTR_NAME_BATCH_LABEL, no need to reuse memory.
  623. Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset);
  624. if (status != SUCCESS) {
  625. GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str());
  626. return FAILED;
  627. }
  628. memory_offset_[0].mem_offset_ = node_mem_offset;
  629. AlignMemOffset(MEM_ALIGN_SIZE);
  630. GELOGD("After reassign memory for virtual output node, align memory = %zu.", memory_offset_[0].mem_offset_);
  631. } else {
  632. // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory.
  633. string current_node_full_name = op_desc->GetName();
  634. size_t pos = current_node_full_name.find(kMbatchNodeNameFlag);
  635. if (pos == string::npos) {
  636. GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual output node, node name: %s.",
  637. kMbatchNodeNameFlag, n->GetName().c_str());
  638. return FAILED;
  639. }
  640. string fixed_name = current_node_full_name.substr(0, pos);
  641. vector<NodePtr> parallel_virtual_output_nodes;
  642. if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) {
  643. parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name];
  644. }
  645. parallel_virtual_output_nodes.emplace_back(n);
  646. mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes;
  647. }
  648. }
  649. }
  650. int32_t mem_reuse_model = 1;
  651. if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) {
  652. GELOGE(FAILED, "Reassign memory of virtual output nodes failed.");
  653. return FAILED;
  654. }
  655. return SUCCESS;
  656. }
  657. Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map,
  658. int32_t mem_reuse_model) {
  659. // Find max batch label value
  660. string max_batch_label;
  661. if (GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label) != SUCCESS) {
  662. GELOGE(FAILED, "Get max batch label failed.");
  663. return FAILED;
  664. }
  665. GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str());
  666. // Assign memory of max batch nodes that have the same batch label.
  667. GELOGD("Start to reassign memory for max batch virtual nodes, memory offset = %zu.", memory_offset_[0].mem_offset_);
  668. vector<size_t> nodes_mem_offset_list;
  669. for (auto &i_map : mem_reuse_nodes_map) {
  670. size_t max_batch_node_mem_offset = memory_offset_[0].mem_offset_;
  671. nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset);
  672. vector<NodePtr> virtual_nodes_list = i_map.second;
  673. for (auto &i_node : virtual_nodes_list) {
  674. // Op_desc is not nullptr, it has been checked.
  675. OpDescPtr op_desc = i_node->GetOpDesc();
  676. string batch_label_string;
  677. // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value.
  678. (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string);
  679. if (batch_label_string == max_batch_label) {
  680. Status status = SUCCESS;
  681. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  682. status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset);
  683. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  684. status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset);
  685. } else {
  686. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  687. return FAILED;
  688. }
  689. if (status != SUCCESS) {
  690. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  691. return FAILED;
  692. }
  693. memory_offset_[0].mem_offset_ = max_batch_node_mem_offset;
  694. AlignMemOffset(MEM_ALIGN_SIZE);
  695. GELOGD("After reassign memory for virtual node, align memory = %zu.", memory_offset_[0].mem_offset_);
  696. // Only assign memory of max batch nodes.
  697. break;
  698. }
  699. }
  700. }
  701. // Assign memory of remaining nodes that have the same fixed_name.
  702. GELOGD("Start to reassign memory for remaining batch virtual nodes, memory offset = %zu.",
  703. memory_offset_[0].mem_offset_);
  704. size_t memory_reuse_index = 0;
  705. for (auto &i_map : mem_reuse_nodes_map) {
  706. vector<NodePtr> virtual_nodes_list = i_map.second;
  707. for (auto &i_node : virtual_nodes_list) {
  708. size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index];
  709. Status status = SUCCESS;
  710. if (mem_reuse_model == kVirtualInputNodeMemoryReuse) {
  711. status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset);
  712. } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) {
  713. status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset);
  714. } else {
  715. GELOGE(FAILED, "Invalid parameter memory reuse model, which is: %d.", mem_reuse_model);
  716. return FAILED;
  717. }
  718. if (status != SUCCESS) {
  719. GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str());
  720. return FAILED;
  721. }
  722. }
  723. memory_reuse_index++;
  724. }
  725. return SUCCESS;
  726. }
  727. Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) {
  728. GE_CHECK_NOTNULL(compute_graph_);
  729. // Atomic op memory start addr
  730. int64_t atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  731. GELOGI("Begin to reAssign atomic memory, atomic initial address mem_offset = %zu!", memory_offset_[0].mem_offset_);
  732. vector<NodePtr> connect_netoutput_nodes;
  733. for (auto &node : compute_graph_->GetAllNodes()) {
  734. auto node_op_desc = node->GetOpDesc();
  735. if (node_op_desc == nullptr) {
  736. continue;
  737. }
  738. bool is_atomic = false;
  739. // If GetBool fail, is_atomic is false.
  740. (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_ATOMIC_NODE, is_atomic);
  741. if (!is_atomic) {
  742. continue;
  743. }
  744. bool is_ref = false;
  745. // If GetBool fail, is_ref is false.
  746. (void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_REFERENCE, is_ref);
  747. if (is_ref) {
  748. GELOGE(ge::PARAM_INVALID, "The node %s cannot have both atomic and ref attribute.",
  749. node_op_desc->GetName().c_str());
  750. return ge::PARAM_INVALID;
  751. }
  752. vector<int> is_connect_netoutput;
  753. // If GetBool fail, attr is_connect_netoutput is an empty vector.
  754. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_NODE_CONNECT_OUTPUT, is_connect_netoutput);
  755. if (!is_connect_netoutput.empty()) {
  756. connect_netoutput_nodes.emplace_back(node);
  757. continue;
  758. }
  759. // Atomic op memory start addr of loop graph
  760. int64_t loop_graph_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  761. vector<int64_t> mem_offset_end;
  762. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  763. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  764. return FAILED;
  765. }
  766. /// In networks with loop op, atomic op uses atomic_addr_clean op independently,
  767. /// so we need to set the attr separately.
  768. if (is_loop_graph) {
  769. GE_CHK_STATUS_RET(SetLoopGraphAtomicAttr(node, loop_graph_atomic_mem_start));
  770. }
  771. }
  772. // In networks without loop op, the same atomic addr clean op is used for atomic op
  773. if (!is_loop_graph) {
  774. // Set the address attr of atomic clean operator
  775. int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
  776. if (atomic_mem_size != 0) {
  777. GE_CHK_STATUS_RET(SetAtomicCleanAttr(nullptr, {atomic_mem_start}, {atomic_mem_size}),
  778. "SetAtomicCleanAttr failed.");
  779. }
  780. }
  781. if (AssignConnectNetOutputAtomicMemory(connect_netoutput_nodes) != SUCCESS) {
  782. GELOGE(FAILED, "Failed to assign memory of nodes that connect to netoutput.");
  783. return FAILED;
  784. }
  785. return SUCCESS;
  786. }
  787. Status GraphMemoryAssigner::AssignAtomicOutputAndWorkspaceMemory(const ge::NodePtr &node,
  788. vector<int64_t> &mem_offset_end) {
  789. auto node_op_desc = node->GetOpDesc();
  790. // Assign atomic node output memory
  791. Status ret = AssignAtomicOutputMemory(node, mem_offset_end);
  792. if (ret != SUCCESS) {
  793. GELOGE(ret, "Failed to assign atomic output memory, node is %s.", node_op_desc->GetName().c_str());
  794. return ret;
  795. }
  796. // Check and assign atomic node workspace memory
  797. map<string, map<int64_t, int64_t>> atomic_workspace_info;
  798. atomic_workspace_info = node_op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_info);
  799. if (!atomic_workspace_info.empty()) {
  800. bool is_fusion_node = false;
  801. // If GetBool fail, is_fusion_node is false.
  802. (void)ge::AttrUtils::GetBool(node_op_desc, ATOMIC_ATTR_IS_FUSION_NODE, is_fusion_node);
  803. if (is_fusion_node) {
  804. // Assign fusion atomic node workspace memory
  805. ret = AssignFusionAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  806. } else {
  807. // Assign single ordinary atomic node workspace memory, not include fusion node
  808. ret = AssignOrdinaryAtomicWorkspaceMemory(node_op_desc, atomic_workspace_info, mem_offset_end);
  809. }
  810. if (ret != SUCCESS) {
  811. GELOGE(ret, "Assign atomic workspace memory failed, node is %s.", node_op_desc->GetName().c_str());
  812. return ret;
  813. }
  814. }
  815. return SUCCESS;
  816. }
  817. Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes) {
  818. for (auto &node : connect_netoutput_nodes) {
  819. GE_CHECK_NOTNULL(node);
  820. if (node->GetOpDesc() == nullptr) {
  821. GELOGW("Current node %s op desc is nullptr, memory assignment is skipped.", node->GetName().c_str());
  822. continue;
  823. }
  824. // Atomic memory start addr
  825. int64_t original_atomic_mem_start = static_cast<int64_t>(memory_offset_[0].mem_offset_);
  826. GELOGD("Start to assign memory of atomic node, node name: %s, node type: %s, mem_offset: %ld.",
  827. node->GetName().c_str(), node->GetOpDesc()->GetType().c_str(), original_atomic_mem_start);
  828. vector<int64_t> mem_offset_end;
  829. if (AssignAtomicOutputAndWorkspaceMemory(node, mem_offset_end) != SUCCESS) {
  830. GELOGE(FAILED, "Assign atomic output and workspace memory failed, node is %s.", node->GetName().c_str());
  831. return FAILED;
  832. }
  833. // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately.
  834. if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) {
  835. GELOGE(FAILED, "Failed to set atomic attr separately.");
  836. return FAILED;
  837. }
  838. }
  839. return SUCCESS;
  840. }
  841. Status GraphMemoryAssigner::AssignReferenceMemory() {
  842. for (auto &node : compute_graph_->GetDirectNode()) {
  843. // Get the reference type of the node, default is false
  844. bool is_ref = false;
  845. // If GetBool fail, is_ref is false.
  846. (void)ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref);
  847. if (!is_ref) {
  848. continue;
  849. }
  850. GELOGI("Current node %s needs to support the reference relationship between output and input.",
  851. node->GetName().c_str());
  852. auto out_op_desc = node->GetOpDesc();
  853. GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED);
  854. vector<int64_t> output_list = out_op_desc->GetOutputOffset();
  855. if (out_op_desc->GetOutputsSize() > output_list.size()) {
  856. GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.",
  857. out_op_desc->GetOutputsSize(), output_list.size());
  858. return ge::FAILED;
  859. }
  860. map<string, int> input_name_index;
  861. for (const auto &input_name : out_op_desc->GetAllInputNames()) {
  862. int index = out_op_desc->GetInputIndexByName(input_name);
  863. input_name_index.emplace(input_name, index);
  864. }
  865. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  866. string out_data_anchor_name = out_op_desc->GetOutputNameByIndex(out_data_anchor->GetIdx());
  867. auto iter = input_name_index.find(out_data_anchor_name);
  868. if (iter != input_name_index.end()) {
  869. int index = iter->second;
  870. GELOGI("Reference memory: input anchor index = %d, input anchor name = %s, output anchor name = %s.", index,
  871. iter->first.c_str(), out_data_anchor_name.c_str());
  872. GE_CHECK_NOTNULL(node->GetInDataAnchor(index));
  873. auto peer_out_anchor = node->GetInDataAnchor(index)->GetPeerOutAnchor();
  874. GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
  875. int peer_out_anchor_index = peer_out_anchor->GetIdx();
  876. auto peer_out_node = peer_out_anchor->GetOwnerNode();
  877. auto peer_out_op_desc = peer_out_node->GetOpDesc();
  878. GE_CHECK_NOTNULL(peer_out_op_desc);
  879. output_list[out_data_anchor->GetIdx()] = peer_out_op_desc->GetOutputOffset()[peer_out_anchor_index];
  880. GELOGI("Reference output : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld]",
  881. node->GetOwnerComputeGraph()->GetName().c_str(), peer_out_op_desc->GetName().c_str(),
  882. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], peer_out_op_desc->GetStreamId());
  883. } else {
  884. GELOGI("Reference output : origin %s name[%s] output[%d] offset is [%ld] stream_id[%ld]",
  885. node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(),
  886. out_data_anchor->GetIdx(), output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId());
  887. }
  888. }
  889. out_op_desc->SetOutputOffset(output_list);
  890. }
  891. return ge::SUCCESS;
  892. }
  893. bool GraphMemoryAssigner::CheckInputIsSupportAtomic(const ge::NodePtr &node) {
  894. for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
  895. auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
  896. if (peer_out_data_anchor == nullptr) {
  897. continue;
  898. }
  899. auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc();
  900. if (peer_op_desc == nullptr) {
  901. continue;
  902. }
  903. if ((peer_op_desc->GetType() == CONSTANTOP) || (peer_op_desc->GetType() == AIPP_DATA_TYPE) ||
  904. (peer_op_desc->GetType() == VARIABLE)) {
  905. GELOGE(ge::FAILED,
  906. "The current node is %s, and the peer out node is %s. Currently, this scenario is not supported",
  907. node->GetName().c_str(), peer_op_desc->GetName().c_str());
  908. return false;
  909. }
  910. }
  911. return true;
  912. }
  913. Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, vector<int64_t> &mem_offset_end) {
  914. auto op_desc = node->GetOpDesc();
  915. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(ge::FAILED, "op_desc is null."); return ge::FAILED);
  916. mem_offset_end.clear();
  917. GELOGD("Begin to assign atomic output memory, node = %s.", op_desc->GetName().c_str());
  918. vector<int64_t> atomic_output_index;
  919. // If GetListInt fail, atomic_output_index is empty.
  920. (void)ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
  921. // Check atomic output
  922. vector<int64_t> output_list = op_desc->GetOutputOffset();
  923. if (atomic_output_index.size() > output_list.size()) {
  924. GELOGE(ge::FAILED, "The size of atomic_output_index is more than the size of output_list");
  925. return ge::FAILED;
  926. }
  927. auto output_list_size = static_cast<int64_t>(output_list.size());
  928. for (auto &output_index : atomic_output_index) {
  929. if (output_index >= output_list_size) {
  930. GELOGE(ge::PARAM_INVALID, "The output index %ld is more than the size %ld of output_list.", output_index,
  931. output_list_size);
  932. return ge::PARAM_INVALID;
  933. }
  934. // If the input of the cascade op needs to clear the atomic addr, there is no need to clear it separately here
  935. bool is_assigned_mem = false;
  936. if (GetMemoryAssignmentStatus(node, output_index, is_assigned_mem) != SUCCESS) {
  937. GELOGE(ge::FAILED, "Failed to get memory assignment of node %s.", node->GetName().c_str());
  938. return ge::FAILED;
  939. }
  940. // If you have already assigned an atomic address, skip it, and you don't need to reassign it.
  941. if (is_assigned_mem) {
  942. GELOGI(
  943. "Node %s atomic output : we have assigned atomic memory as the input of next node in "
  944. "ReAssignContinuousMemory function.",
  945. op_desc->GetName().c_str());
  946. continue;
  947. }
  948. auto output_desc = op_desc->GetAllOutputsDescPtr().at(output_index);
  949. int64_t size = 0;
  950. if (ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS) {
  951. GELOGI("Get size failed");
  952. }
  953. output_list[output_index] = memory_offset_[0].mem_offset_;
  954. GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld].",
  955. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, memory_offset_[0].mem_offset_,
  956. op_desc->GetStreamId(), size, size);
  957. memory_offset_[0].mem_offset_ += size;
  958. AlignMemOffset(MEM_ALIGN_SIZE);
  959. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  960. }
  961. op_desc->SetOutputOffset(output_list);
  962. return ge::SUCCESS;
  963. }
  964. Status GraphMemoryAssigner::GetMemoryAssignmentStatus(const ge::NodePtr &node, int64_t output_index,
  965. bool &is_mem_assigned) {
  966. if (static_cast<size_t>(output_index) >= node->GetAllOutDataAnchors().size()) {
  967. GELOGE(ge::PARAM_INVALID, "Output index %ld is more than the size of node's AllOutDataAnchors.", output_index);
  968. return ge::PARAM_INVALID;
  969. }
  970. auto out_data_anchor = node->GetAllOutDataAnchors().at(output_index);
  971. GE_CHECK_NOTNULL(out_data_anchor);
  972. auto input_anchors = out_data_anchor->GetPeerInDataAnchors();
  973. for (auto &input_anchor : input_anchors) {
  974. auto output_node = input_anchor->GetOwnerNode();
  975. /// Get input atomic attr of peer output op, if atomic_input_index[0] = -1, indicates that the atomic address
  976. /// has been assigned
  977. vector<int64_t> atomic_input_index;
  978. (void)ge::AttrUtils::GetListInt(output_node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, atomic_input_index);
  979. if (!atomic_input_index.empty() && (atomic_input_index[0] == kAllInputAddrIsAtomic)) {
  980. is_mem_assigned = true;
  981. break;
  982. }
  983. }
  984. return SUCCESS;
  985. }
  986. Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  987. map<string, map<int64_t, int64_t>> &workspace_info,
  988. vector<int64_t> &mem_offset_end) {
  989. GELOGI("Begin to reassign normal atomic memory, node = %s.", op_desc->GetName().c_str());
  990. vector<int64_t> workspace_vector = op_desc->GetWorkspace();
  991. for (auto iter = workspace_info.begin(); iter != workspace_info.end(); ++iter) {
  992. if (op_desc->GetName() != iter->first) {
  993. GELOGE(ge::PARAM_INVALID, "The node name %s and the node name %s in workspace info are inconsistent.",
  994. op_desc->GetName().c_str(), iter->first.c_str());
  995. return ge::PARAM_INVALID;
  996. }
  997. if (iter->second.empty()) {
  998. continue;
  999. }
  1000. for (auto &info_iter : iter->second) {
  1001. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1002. auto workspace_size = info_iter.second;
  1003. if (workspace_index >= workspace_vector.size()) {
  1004. GELOGE(ge::PARAM_INVALID, "The workspace index %lu is more than the size %zu of workspace vector.",
  1005. workspace_index, workspace_vector.size());
  1006. return ge::PARAM_INVALID;
  1007. }
  1008. workspace_vector[workspace_index] = memory_offset_[0].mem_offset_;
  1009. GELOGI(
  1010. "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] "
  1011. "size[%ld] real_size[%ld].",
  1012. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1013. op_desc->GetStreamId(), workspace_size, workspace_size);
  1014. memory_offset_[0].mem_offset_ += workspace_size;
  1015. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1016. }
  1017. }
  1018. op_desc->SetWorkspace(workspace_vector);
  1019. return SUCCESS;
  1020. }
  1021. Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPtr &op_desc,
  1022. map<string, map<int64_t, int64_t>> &workspace_info,
  1023. vector<int64_t> &mem_offset_end) {
  1024. GELOGI("Begin to reassign fusion atomic memory, node = %s.", op_desc->GetName().c_str());
  1025. map<string, map<int64_t, int64_t>> sub_node_workspace_offset;
  1026. for (auto &iter : workspace_info) {
  1027. if (iter.second.empty()) {
  1028. continue;
  1029. }
  1030. map<int64_t, int64_t> index_offset;
  1031. for (auto &info_iter : iter.second) {
  1032. auto workspace_index = static_cast<uint64_t>(info_iter.first);
  1033. auto workspace_size = info_iter.second;
  1034. size_t workspace_offset = memory_offset_[0].mem_offset_;
  1035. GELOGI(
  1036. "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] "
  1037. "real_size[%ld].",
  1038. compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, memory_offset_[0].mem_offset_,
  1039. op_desc->GetStreamId(), workspace_size, workspace_size);
  1040. memory_offset_[0].mem_offset_ += workspace_size;
  1041. mem_offset_end.emplace_back(memory_offset_[0].mem_offset_);
  1042. index_offset.insert(std::make_pair(workspace_index, workspace_offset));
  1043. }
  1044. sub_node_workspace_offset.insert(std::make_pair(iter.first, index_offset));
  1045. }
  1046. if (!(op_desc->SetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, sub_node_workspace_offset))) {
  1047. GELOGE(FAILED, "Set EXT_ATTR_ATOMIC_WORKSPACE_OFFSET failed, op name:%s.", op_desc->GetName().c_str());
  1048. return FAILED;
  1049. }
  1050. return SUCCESS;
  1051. }
  1052. Status GraphMemoryAssigner::CheckOffset() {
  1053. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1054. GE_CHECK_NOTNULL(node->GetOpDesc());
  1055. vector<int64_t> input_list = node->GetOpDesc()->GetInputOffset();
  1056. for (auto input : input_list) {
  1057. if (input == ge::kInvalidOffset) {
  1058. GELOGE(FAILED, "Invalid offset in node: %s input: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1059. return FAILED;
  1060. }
  1061. }
  1062. vector<int64_t> output_list = node->GetOpDesc()->GetOutputOffset();
  1063. for (auto output : output_list) {
  1064. if (output == ge::kInvalidOffset) {
  1065. GELOGE(FAILED, "Invalid offset in node: %s output: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1066. return FAILED;
  1067. }
  1068. }
  1069. vector<int64_t> workspace_list = node->GetOpDesc()->GetWorkspace();
  1070. for (auto workspace : workspace_list) {
  1071. if (workspace == ge::kInvalidOffset) {
  1072. GELOGE(FAILED, "Invalid offset in node: %s workspace: %ld.", node->GetName().c_str(), ge::kInvalidOffset);
  1073. return FAILED;
  1074. }
  1075. }
  1076. }
  1077. return SUCCESS;
  1078. }
  1079. ge::Status GraphMemoryAssigner::SetInputOffset() {
  1080. if (memory_offset_.empty()) {
  1081. GELOGE(FAILED, "memory_offset_ is empty.");
  1082. return FAILED;
  1083. }
  1084. GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu]", compute_graph_->GetName().c_str(),
  1085. memory_offset_[0].mem_offset_);
  1086. for (const ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1087. if (UpdateOpInputOffset(node) != ge::SUCCESS) {
  1088. GELOGE(ge::FAILED, "Update op input offset failed");
  1089. return ge::FAILED;
  1090. }
  1091. }
  1092. return ge::SUCCESS;
  1093. }
  1094. NodePtr GraphMemoryAssigner::GetKnownInputNode(const NodePtr &node) const {
  1095. if (!node->GetOpDesc()->HasAttr(ATTR_NAME_PARENT_NODE_INDEX)) {
  1096. return node;
  1097. }
  1098. if (NodeUtils::IsDynamicShape(node)) {
  1099. return node;
  1100. }
  1101. return NodeUtils::GetParentInput(node);
  1102. }
  1103. ge::Status GraphMemoryAssigner::UpdateConstArgsOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1104. uint32_t parent_index = 0;
  1105. if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
  1106. return SUCCESS;
  1107. }
  1108. // Subgraph Data Node, check for constant input.
  1109. std::string op_type;
  1110. const auto &in_node = NodeUtils::GetParentInput(node);
  1111. if (NodeUtils::GetConstOpType(in_node, op_type)) {
  1112. input_list = in_node->GetOpDesc()->GetOutputOffset();
  1113. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as const output.
  1114. return SUCCESS; // Constant input.
  1115. }
  1116. // Memory allocated for dynamic shape subgraph Data.
  1117. if (NodeUtils::IsDynamicShape(node)) {
  1118. return SUCCESS;
  1119. }
  1120. const auto &owner = node->GetOwnerComputeGraph();
  1121. const auto &parent_desc = owner->GetParentNode()->GetOpDesc();
  1122. const auto parent_inputs = parent_desc->GetInputOffset();
  1123. if (parent_inputs.size() <= parent_index) {
  1124. GELOGE(FAILED, "Get Parent input offset failed, node: %s, input size: %zu, parent index: %u",
  1125. node->GetName().c_str(), parent_inputs.size(), parent_index);
  1126. return FAILED;
  1127. }
  1128. input_list = {parent_inputs[parent_index]};
  1129. node->GetOpDesc()->SetOutputOffset(input_list); // Set Data output same as parent input.
  1130. return SUCCESS;
  1131. }
  1132. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<int64_t> &input_list) const {
  1133. vector<int64_t> origin_input_list;
  1134. vector<int64_t> memory_type;
  1135. auto tmp_op_desc = node->GetOpDesc();
  1136. origin_input_list = tmp_op_desc->GetInputOffset();
  1137. bool has_mem_type_attr = ge::AttrUtils::GetListInt(tmp_op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
  1138. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1139. vector<int64_t> output_list;
  1140. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1141. if (peer_out_anchor == nullptr) {
  1142. continue;
  1143. }
  1144. // If the current node not broadcast, the OutputOffset of the previous node is used to update the input_list
  1145. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1146. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1147. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1148. output_list = last_peer_out_op_desc->GetOutputOffset();
  1149. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1150. auto input_index = anchor->GetIdx();
  1151. if (has_mem_type_attr) {
  1152. auto input_size = tmp_op_desc->GetInputsSize();
  1153. auto ori_input_offset_list_size = origin_input_list.size();
  1154. auto mem_type_size = memory_type.size();
  1155. if ((input_size != mem_type_size) || (input_size != ori_input_offset_list_size)) {
  1156. GELOGE(ge::FAILED,
  1157. "fusion: node[%s] input_size[%zu] diff from memory_type_size[%zu]"
  1158. " from ori_input_offset_list_size[%lu]",
  1159. tmp_op_desc->GetName().c_str(), input_size, mem_type_size, ori_input_offset_list_size);
  1160. return ge::FAILED;
  1161. }
  1162. // not hbm keep orignal inputoffest
  1163. // hbm inputoffset = original inputoffset + outputoffset
  1164. input_list.emplace_back(memory_type[input_index] == RT_MEMORY_L1
  1165. ? origin_input_list[input_index]
  1166. : origin_input_list[input_index] + output_list.at(peer_out_anchor->GetIdx()));
  1167. GELOGI("fuison: node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]",
  1168. tmp_op_desc->GetName().c_str(), input_index,
  1169. peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
  1170. input_list.back());
  1171. } else {
  1172. int64_t output_offset = output_list.at(peer_out_anchor->GetIdx());
  1173. const auto &in_node = GetKnownInputNode(peer_out_anchor->GetOwnerNode());
  1174. if (in_node->GetType() == CONSTANT) {
  1175. GeTensorDesc tensor_desc = tmp_op_desc->GetInputDesc(input_index);
  1176. GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, output_offset));
  1177. }
  1178. GELOGI("node[%s] input[%d] is set from node[%s] out index[%d] offset[%ld]", tmp_op_desc->GetName().c_str(),
  1179. input_index, peer_out_anchor->GetOwnerNode()->GetOpDesc()->GetName().c_str(), peer_out_anchor->GetIdx(),
  1180. output_offset);
  1181. input_list.emplace_back(output_offset);
  1182. }
  1183. }
  1184. }
  1185. return ge::SUCCESS;
  1186. }
  1187. ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const {
  1188. GE_CHECK_NOTNULL(node->GetOpDesc());
  1189. vector<int64_t> input_list;
  1190. if (node->GetType() == HCOMBROADCAST || node->GetType() == HVDCALLBACKBROADCAST) {
  1191. for (const auto &anchor : node->GetAllInDataAnchors()) {
  1192. vector<int64_t> output_list;
  1193. auto peer_out_anchor = anchor->GetPeerOutAnchor();
  1194. if (peer_out_anchor == nullptr) {
  1195. continue;
  1196. }
  1197. auto last_peer_out_node = peer_out_anchor->GetOwnerNode();
  1198. // If the current node is broadcast and the preceding node is variable, because InputOffset has been set
  1199. // in function:AssignVarAttr2Nodes, then the InputOffset of the broadcast node is taken to update the input_list.
  1200. // Otherwise, the OutputOffset of the previous node is used to update the input_list.
  1201. if (last_peer_out_node->GetType() != VARIABLE) {
  1202. auto last_peer_out_op_desc = last_peer_out_node->GetOpDesc();
  1203. GE_CHECK_NOTNULL(last_peer_out_op_desc);
  1204. output_list = last_peer_out_op_desc->GetOutputOffset();
  1205. if (output_list.size() > static_cast<size_t>(peer_out_anchor->GetIdx())) {
  1206. input_list.emplace_back(output_list.at(peer_out_anchor->GetIdx()));
  1207. }
  1208. } else {
  1209. vector<int64_t> cur_node_input_list;
  1210. auto cur_node_op_desc = node->GetOpDesc();
  1211. GE_CHECK_NOTNULL(cur_node_op_desc);
  1212. cur_node_input_list = cur_node_op_desc->GetInputOffset();
  1213. if (cur_node_input_list.size() > static_cast<size_t>(anchor->GetIdx())) {
  1214. input_list.emplace_back(cur_node_input_list.at(anchor->GetIdx()));
  1215. }
  1216. }
  1217. }
  1218. } else if (node->GetType() == DATA_TYPE) {
  1219. if (UpdateConstArgsOffset(node, input_list) != SUCCESS) {
  1220. GELOGE(FAILED, "Update data: %s args offset failed.", node->GetName().c_str());
  1221. return FAILED;
  1222. }
  1223. } else {
  1224. if (UpdateOpInputOffset(node, input_list) != SUCCESS) {
  1225. GELOGE(FAILED, "Update node: %s input offset failed.", node->GetName().c_str());
  1226. return FAILED;
  1227. }
  1228. }
  1229. node->GetOpDesc()->SetInputOffset(input_list);
  1230. return SUCCESS;
  1231. }
  1232. Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start,
  1233. const vector<int64_t> &mem_offset_end) {
  1234. GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start);
  1235. // Parsing offset and size vectors
  1236. vector<int64_t> memory_offset_start;
  1237. vector<int64_t> memory_offset_size;
  1238. memory_offset_start.emplace_back(atomic_mem_start);
  1239. for (size_t i = 0; i < mem_offset_end.size(); ++i) {
  1240. memory_offset_start.emplace_back(mem_offset_end[i]);
  1241. // Number 1 means element index
  1242. auto size = memory_offset_start[i + 1] - memory_offset_start[i];
  1243. memory_offset_size.emplace_back(size);
  1244. }
  1245. memory_offset_start.pop_back();
  1246. const auto &in_control_anchor = node->GetInControlAnchor();
  1247. if (!memory_offset_size.empty() && in_control_anchor != nullptr) {
  1248. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1249. if (peer_out_control_anchor == nullptr) {
  1250. continue;
  1251. }
  1252. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1253. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1254. if (peer_out_node_desc == nullptr) {
  1255. continue;
  1256. }
  1257. GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(),
  1258. peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str());
  1259. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1260. if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) {
  1261. GELOGE(FAILED, "Set atomic clean attr failed.");
  1262. return FAILED;
  1263. }
  1264. }
  1265. }
  1266. }
  1267. return SUCCESS;
  1268. }
  1269. Status GraphMemoryAssigner::SetLoopGraphAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start) {
  1270. // set the address attr of atomic clean operator for loop graph
  1271. int64_t atomic_mem_size = memory_offset_[0].mem_offset_ - atomic_mem_start;
  1272. GELOGI("SetLoopGraphAtomicAttr beign, atomic_addr_clean start size is %ld, mem_size is %ld, mem_offset is %zu.",
  1273. atomic_mem_start, atomic_mem_size, memory_offset_[0].mem_offset_);
  1274. const auto &in_control_anchor = node->GetInControlAnchor();
  1275. if (atomic_mem_size != 0 && in_control_anchor != nullptr) {
  1276. for (auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) {
  1277. if (peer_out_control_anchor == nullptr) {
  1278. continue;
  1279. }
  1280. auto peer_out_node = peer_out_control_anchor->GetOwnerNode();
  1281. auto peer_out_node_desc = peer_out_node->GetOpDesc();
  1282. if (peer_out_node_desc == nullptr) {
  1283. continue;
  1284. }
  1285. GELOGD("SetLoopGraphAtomicAttr, node is %s, op type is %s.", peer_out_node_desc->GetName().c_str(),
  1286. peer_out_node_desc->GetType().c_str());
  1287. if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) {
  1288. GE_CHK_STATUS_EXEC(SetAtomicCleanAttr(peer_out_node, {atomic_mem_start}, {atomic_mem_size}),
  1289. GELOGE(FAILED, "SetAtomicCleanAttr failed.");
  1290. return FAILED);
  1291. }
  1292. }
  1293. }
  1294. return SUCCESS;
  1295. }
  1296. ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &n, const vector<int64_t> &atomic_mem_start,
  1297. const vector<int64_t> &atomic_mem_size) {
  1298. for (ge::NodePtr &node : compute_graph_->GetAllNodes()) {
  1299. auto node_op_desc = node->GetOpDesc();
  1300. GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
  1301. if (((n != nullptr) && (node->GetName() == n->GetName())) ||
  1302. ((n == nullptr) && (node_op_desc->GetType() == ATOMICADDRCLEAN))) {
  1303. vector<int64_t> workspace_vector = node_op_desc->GetWorkspace();
  1304. vector<int64_t> workspace_byte_vector = node_op_desc->GetWorkspaceBytes();
  1305. workspace_vector.insert(workspace_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1306. workspace_byte_vector.insert(workspace_byte_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1307. node_op_desc->SetWorkspace(workspace_vector);
  1308. node_op_desc->SetWorkspaceBytes(workspace_byte_vector);
  1309. std::vector<int64_t> mem_start_vector;
  1310. // If GetListInt fail, mem_start_vector is empty.
  1311. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector);
  1312. mem_start_vector.insert(mem_start_vector.end(), atomic_mem_start.begin(), atomic_mem_start.end());
  1313. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_START, mem_start_vector),
  1314. GELOGE(FAILED, "SetListInt failed.");
  1315. return FAILED);
  1316. std::vector<int64_t> mem_size_vector;
  1317. // If GetListInt fail, mem_size_vector is empty.
  1318. (void)ge::AttrUtils::GetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector);
  1319. mem_size_vector.insert(mem_size_vector.end(), atomic_mem_size.begin(), atomic_mem_size.end());
  1320. GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(node_op_desc, ATTR_NAME_AUTOMIC_ADD_MEM_SIZE, mem_size_vector),
  1321. GELOGE(FAILED, "SetListInt failed.");
  1322. return FAILED);
  1323. std::stringstream ss;
  1324. for (auto iter : atomic_mem_start) {
  1325. ss << iter << " ";
  1326. }
  1327. string atomic_mem_start_str = ss.str();
  1328. ss.clear();
  1329. ss.str("");
  1330. for (auto iter : atomic_mem_size) {
  1331. ss << iter << " ";
  1332. }
  1333. string atomic_mem_size_str = ss.str();
  1334. GELOGI("[IMAS]SetAtomicCleanAttr : Set graph[%s] atomic_node[%s] output offset [%s] size[%s] streamid[%ld]",
  1335. node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(),
  1336. atomic_mem_start_str.c_str(), atomic_mem_size_str.c_str(), node->GetOpDesc()->GetStreamId());
  1337. }
  1338. }
  1339. return SUCCESS;
  1340. }
  1341. void GraphMemoryAssigner::AlignMemOffset(const int64_t &mem_align_size) {
  1342. if (mem_align_size <= 0) {
  1343. return;
  1344. }
  1345. memory_offset_[0].mem_offset_ =
  1346. (memory_offset_[0].mem_offset_ + mem_align_size - 1) / mem_align_size * mem_align_size;
  1347. }
  1348. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示