You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_dumper.cc 39 kB

5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/model_manager/data_dumper.h"
  17. #include <cstdlib>
  18. #include <ctime>
  19. #include <map>
  20. #include <utility>
  21. #include <vector>
  22. #include "common/debug/memory_dumper.h"
  23. #include "common/properties_manager.h"
  24. #include "framework/common/util.h"
  25. #include "framework/common/debug/ge_log.h"
  26. #include "framework/common/util.h"
  27. #include "graph/anchor.h"
  28. #include "graph/debug/ge_attr_define.h"
  29. #include "graph/load/model_manager/model_utils.h"
  30. #include "graph/manager/util/debug.h"
  31. #include "graph/utils/attr_utils.h"
  32. #include "graph/utils/tensor_utils.h"
  33. #include "proto/dump_task.pb.h"
  34. #include "proto/ge_ir.pb.h"
  35. #include "proto/op_mapping.pb.h"
  36. #include "runtime/base.h"
  37. #include "runtime/mem.h"
  38. namespace {
  39. const uint32_t kAicpuLoadFlag = 1;
  40. const uint32_t kAicpuUnloadFlag = 0;
  41. const int64_t kOpDebugSize = 2048;
  42. const int64_t kOpDebugShape = 2048;
  43. const int8_t kDecimal = 10;
  44. const uint32_t kAddrLen = sizeof(void *);
  45. const char *const kDumpOutput = "output";
  46. const char *const kDumpInput = "input";
  47. const char *const kDumpAll = "all";
  48. // parse for format like nodename:input:index
  49. static bool ParseNameIndex(const std::string &node_name_index, std::string &node_name, std::string &input_or_output,
  50. size_t &index) {
  51. auto sep = node_name_index.rfind(':');
  52. if (sep == std::string::npos) {
  53. return false;
  54. }
  55. auto index_str = node_name_index.substr(sep + 1);
  56. index = static_cast<size_t>(std::strtol(index_str.c_str(), nullptr, kDecimal));
  57. auto node_name_without_index = node_name_index.substr(0, sep);
  58. sep = node_name_without_index.rfind(':');
  59. if (sep == std::string::npos) {
  60. return false;
  61. }
  62. node_name = node_name_without_index.substr(0, sep);
  63. input_or_output = node_name_without_index.substr(sep + 1);
  64. return !(input_or_output != kDumpInput && input_or_output != kDumpOutput);
  65. }
  66. static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) {
  67. return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1);
  68. }
  69. } // namespace
  70. static int32_t GetIrDataType(ge::DataType data_type) {
  71. static const std::map<ge::DataType, ge::proto::DataType> data_type_map = {
  72. {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  73. {ge::DT_FLOAT, ge::proto::DT_FLOAT},
  74. {ge::DT_FLOAT16, ge::proto::DT_FLOAT16},
  75. {ge::DT_INT8, ge::proto::DT_INT8},
  76. {ge::DT_UINT8, ge::proto::DT_UINT8},
  77. {ge::DT_INT16, ge::proto::DT_INT16},
  78. {ge::DT_UINT16, ge::proto::DT_UINT16},
  79. {ge::DT_INT32, ge::proto::DT_INT32},
  80. {ge::DT_INT64, ge::proto::DT_INT64},
  81. {ge::DT_UINT32, ge::proto::DT_UINT32},
  82. {ge::DT_UINT64, ge::proto::DT_UINT64},
  83. {ge::DT_BOOL, ge::proto::DT_BOOL},
  84. {ge::DT_DOUBLE, ge::proto::DT_DOUBLE},
  85. {ge::DT_DUAL, ge::proto::DT_DUAL},
  86. {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8},
  87. {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8},
  88. {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64},
  89. {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128},
  90. {ge::DT_QINT8, ge::proto::DT_QINT8},
  91. {ge::DT_QINT16, ge::proto::DT_QINT16},
  92. {ge::DT_QINT32, ge::proto::DT_QINT32},
  93. {ge::DT_QUINT8, ge::proto::DT_QUINT8},
  94. {ge::DT_QUINT16, ge::proto::DT_QUINT16},
  95. {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
  96. {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
  97. {ge::DT_STRING, ge::proto::DT_STRING},
  98. {ge::DT_VARIANT, ge::proto::DT_VARIANT},
  99. };
  100. auto iter = data_type_map.find(data_type);
  101. if (iter == data_type_map.end()) {
  102. return static_cast<int32_t>(ge::proto::DT_UNDEFINED);
  103. }
  104. return static_cast<int32_t>(iter->second);
  105. }
  106. namespace ge {
  107. DataDumper::~DataDumper() {
  108. ReleaseDevMem(&dev_mem_load_);
  109. ReleaseDevMem(&dev_mem_unload_);
  110. }
  111. void DataDumper::ReleaseDevMem(void **ptr) noexcept {
  112. if (ptr == nullptr) {
  113. return;
  114. }
  115. if (*ptr != nullptr) {
  116. rtError_t rt_ret = rtFree(*ptr);
  117. if (rt_ret != RT_ERROR_NONE) {
  118. GELOGE(RT_FAILED, "[Call][RtFree] failed, ret:0x%X", rt_ret);
  119. }
  120. *ptr = nullptr;
  121. }
  122. }
  123. void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) {
  124. global_step_ = reinterpret_cast<uintptr_t>(global_step);
  125. loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter);
  126. loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
  127. }
  128. void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
  129. if (node != nullptr) {
  130. auto input_op_desc = node->GetOpDesc();
  131. if (input_op_desc == nullptr) {
  132. GELOGE(PARAM_INVALID, "[Get][OpDesc] input op desc is null.");
  133. return;
  134. }
  135. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  136. for (auto &dst_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
  137. ge::NodePtr dst_node = dst_in_data_anchor->GetOwnerNode();
  138. auto op_desc = dst_node->GetOpDesc();
  139. if (op_desc == nullptr) {
  140. GELOGE(PARAM_INVALID, "[Get][OpDesc] input op desc is null.");
  141. return;
  142. }
  143. input_map_.insert(
  144. {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
  145. }
  146. }
  147. }
  148. }
  149. void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) {
  150. end_graph_task_id_ = task_id;
  151. end_graph_stream_id_ = stream_id;
  152. }
  153. void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug) {
  154. op_debug_task_id_ = task_id;
  155. op_debug_stream_id_ = stream_id;
  156. op_debug_addr_ = op_debug_addr;
  157. is_op_debug_ = is_op_debug;
  158. }
  159. void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
  160. uintptr_t args) {
  161. if (op_desc == nullptr) {
  162. GELOGE(PARAM_INVALID, "[Check][Param] Opdesc is nullptr");
  163. return;
  164. }
  165. GELOGI("Save dump task %s, task id: %u, stream id: %u", op_desc->GetName().c_str(), task_id, stream_id);
  166. op_list_.push_back({task_id, stream_id, op_desc, args, true});
  167. for (auto iter = input_map_.equal_range(op_desc->GetName()); iter.first != iter.second; ++iter.first) {
  168. InnerInputMapping &inner_input_mapping = iter.first->second;
  169. auto &data_op = inner_input_mapping.data_op;
  170. if (data_op == nullptr) {
  171. GELOGE(PARAM_INVALID, "[Check][Param] data_op is null.");
  172. return;
  173. }
  174. auto input_tensor = op_desc->GetInputDescPtr(inner_input_mapping.input_anchor_index);
  175. if (input_tensor == nullptr) {
  176. GELOGE(PARAM_INVALID, "[Get][InputDescPtr] input_tensor in op:%s is null, index:%d, size:%zu.",
  177. op_desc->GetName().c_str(), inner_input_mapping.input_anchor_index, op_desc->GetInputsSize());
  178. return;
  179. }
  180. int64_t data_size = 0;
  181. if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
  182. GELOGI("Get aipp data size according to attr is %ld", data_size);
  183. } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
  184. GELOGE(PARAM_INVALID, "[Get][InputSize] failed in %s, index:%u",
  185. op_desc->GetName().c_str(), inner_input_mapping.input_anchor_index);
  186. return;
  187. }
  188. GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
  189. stream_id, data_size);
  190. op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
  191. inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
  192. }
  193. }
  194. static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
  195. toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  196. if (step_id != 0) {
  197. GELOGI("step_id exists.");
  198. op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  199. }
  200. if (loop_per_iter != 0) {
  201. GELOGI("loop_per_iter exists.");
  202. op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  203. }
  204. if (loop_cond != 0) {
  205. GELOGI("loop_cond exists.");
  206. op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  207. }
  208. }
  209. Status DataDumper::GenerateOutput(toolkit::aicpu::dump::Output &output,
  210. const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  211. const uintptr_t &addr, size_t index) {
  212. output.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  213. output.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  214. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  215. output.mutable_shape()->add_dim(dim);
  216. }
  217. for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
  218. output.mutable_origin_shape()->add_dim(dim);
  219. }
  220. int64_t output_size = 0;
  221. if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
  222. REPORT_CALL_ERROR("E19999", "Get tensor size fail");
  223. GELOGE(PARAM_INVALID, "[Get][OutputSize] failed");
  224. return PARAM_INVALID;
  225. }
  226. GELOGD("Get output size in dump is %ld", output_size);
  227. std::string origin_name;
  228. int32_t origin_output_index = -1;
  229. (void)AttrUtils::GetStr(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  230. (void)AttrUtils::GetInt(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  231. output.set_size(output_size);
  232. output.set_original_name(origin_name);
  233. output.set_original_output_index(origin_output_index);
  234. output.set_original_output_format(static_cast<int32_t>(tensor_descs.at(index).GetOriginFormat()));
  235. output.set_original_output_data_type(static_cast<int32_t>(tensor_descs.at(index).GetOriginDataType()));
  236. output.set_address(static_cast<uint64_t>(addr));
  237. return SUCCESS;
  238. }
  239. Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info,
  240. toolkit::aicpu::dump::Output &output,
  241. size_t i, const std::string &node_name_index) {
  242. std::string dump_op_name;
  243. std::string input_or_output;
  244. size_t index;
  245. // parser and find which node's input or output tensor desc is chosen for dump info
  246. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  247. GELOGE(PARAM_INVALID, "[Check][Param] Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  248. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  249. return PARAM_INVALID;
  250. }
  251. GE_CHECK_NOTNULL(compute_graph_);
  252. auto replace_node = compute_graph_->FindNode(dump_op_name);
  253. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  254. "[Check][Param] Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF "
  255. "attr[%s], cannot find redirect node[%s].",
  256. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  257. dump_op_name.c_str());
  258. auto replace_opdesc = replace_node->GetOpDesc();
  259. GE_CHECK_NOTNULL(replace_opdesc);
  260. auto iter = ref_info_.find(replace_opdesc);
  261. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  262. "[Check][Param] Op [%s] output desc[%zu] cannot find "
  263. "any saved redirect node[%s]'s info.",
  264. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  265. GE_CHECK_NOTNULL(iter->second);
  266. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  267. if (input_or_output == kDumpInput) {
  268. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  269. addr += kAddrLen * index;
  270. GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index),
  271. "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
  272. } else if (input_or_output == kDumpOutput) {
  273. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  274. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  275. addr += (index + replace_input_size) * kAddrLen;
  276. GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index),
  277. "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
  278. }
  279. GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  280. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  281. return SUCCESS;
  282. }
  283. Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, toolkit::aicpu::dump::Task &task) {
  284. const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
  285. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  286. std::vector<int64_t> v_memory_type;
  287. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
  288. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()),
  289. "[Check][Param] DumpOutputWithTask[%s], output size[%zu], "
  290. "output memory type size[%zu]", inner_dump_info.op->GetName().c_str(),
  291. output_descs.size(), v_memory_type.size());
  292. size_t no_need_dump_output_num = 0;
  293. for (size_t i = 0; i < output_descs.size(); ++i) {
  294. toolkit::aicpu::dump::Output output;
  295. std::string node_name_index;
  296. const auto &output_desc = output_descs.at(i);
  297. int32_t calc_type = 0;
  298. bool has_calc_type = ge::AttrUtils::GetInt(output_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type);
  299. if (has_calc_type && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) {
  300. GELOGD("Node[%s] output[index:%zu] [name:%s] is an optional output, don't need to dump this output.",
  301. inner_dump_info.op->GetName().c_str(), i, output_desc.GetName().c_str());
  302. ++no_need_dump_output_num;
  303. continue;
  304. }
  305. if (output_descs.size() - no_need_dump_output_num < output_addrs.size()) {
  306. REPORT_INNER_ERROR("E19999", "The number of output does not match in op:%s(%s). The size[%zu] of output which is "
  307. "no need to dump should not greater than the size[%zu] of output descs minus the size[%zu] of "
  308. "output which is need to dump.", inner_dump_info.op->GetName().c_str(),
  309. inner_dump_info.op->GetType().c_str(), no_need_dump_output_num, output_descs.size(),
  310. output_addrs.size());
  311. GELOGE(PARAM_INVALID, "[Check][Param] The number of output does not match in op:%s(%s). The size[%zu] of output "
  312. "which is no need to dump should not greater than the size[%zu] of output descs minus the size[%zu] "
  313. "of output which is need to dump.", inner_dump_info.op->GetName().c_str(),
  314. inner_dump_info.op->GetType().c_str(), no_need_dump_output_num, output_descs.size(), output_addrs.size());
  315. return PARAM_INVALID;
  316. }
  317. // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  318. if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
  319. GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "[Dump][RefOutput] failed");
  320. task.mutable_output()->Add(std::move(output));
  321. } else {
  322. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  323. GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i);
  324. int64_t output_size = 0;
  325. if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
  326. REPORT_CALL_ERROR("E19999", "Get output tensor size fail in op:%s(%s), index:%zu",
  327. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
  328. GELOGE(PARAM_INVALID, "[Get][OutputSize] failed in %s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
  329. return PARAM_INVALID;
  330. }
  331. GELOGI("Get output size of l1_fusion_dump is %ld", output_size);
  332. GenerateOpBuffer(output_size, task);
  333. } else {
  334. const auto input_size = inner_dump_info.op->GetInputsSize();
  335. auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
  336. GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i),
  337. "[Generate][Output] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
  338. task.mutable_output()->Add(std::move(output));
  339. }
  340. }
  341. }
  342. return SUCCESS;
  343. }
  344. Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, toolkit::aicpu::dump::Task &task) {
  345. GELOGI("Start dump output");
  346. if (inner_dump_info.is_task) {
  347. // tbe or aicpu op, these ops are with task
  348. return DumpOutputWithTask(inner_dump_info, task);
  349. }
  350. // else data, const or variable op
  351. toolkit::aicpu::dump::Output output;
  352. auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
  353. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  354. if (output_tensor == nullptr) {
  355. REPORT_INNER_ERROR("E19999", "output_desc tensor is nullptr in op:%s(%s), index:%u, check invalid",
  356. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(),
  357. inner_dump_info.output_anchor_index);
  358. GELOGE(PARAM_INVALID, "[Get][OutputDescPtr] output_tensor is null in op:%s, index:%d, size:%zu.",
  359. inner_dump_info.op->GetName().c_str(), inner_dump_info.output_anchor_index,
  360. inner_dump_info.op->GetOutputsSize());
  361. return PARAM_INVALID;
  362. }
  363. output.set_data_type(static_cast<int32_t>(GetIrDataType(output_tensor->GetDataType())));
  364. output.set_format(static_cast<int32_t>(output_tensor->GetFormat()));
  365. for (auto dim : inner_dump_info.dims) {
  366. output.mutable_shape()->add_dim(dim);
  367. }
  368. std::string origin_name;
  369. int32_t origin_output_index = -1;
  370. (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  371. (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  372. output.set_size(inner_dump_info.data_size);
  373. output.set_original_name(origin_name);
  374. output.set_original_output_index(origin_output_index);
  375. output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
  376. output.set_original_output_data_type(static_cast<int32_t>(output_tensor->GetOriginDataType()));
  377. // due to lhisi virtual addr bug, cannot use args now
  378. if (inner_dump_info.output_anchor_index >= static_cast<int>(output_addrs.size())) {
  379. REPORT_INNER_ERROR("E19999", "output_anchor_index:%u >= output addr size:%zu in op:%s(%s), "
  380. "check invalid", inner_dump_info.output_anchor_index, output_addrs.size(),
  381. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
  382. GELOGE(FAILED, "[Check][Param] output_anchor_index:%u >= output addr size:%zu in op:%s(%s)",
  383. inner_dump_info.output_anchor_index, output_addrs.size(),
  384. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
  385. return FAILED;
  386. }
  387. auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
  388. output.set_address(static_cast<uint64_t>(data_addr));
  389. task.mutable_output()->Add(std::move(output));
  390. return SUCCESS;
  391. }
  392. Status DataDumper::GenerateInput(toolkit::aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  393. const uintptr_t &addr, size_t index) {
  394. input.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  395. input.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  396. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  397. input.mutable_shape()->add_dim(dim);
  398. }
  399. for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
  400. input.mutable_origin_shape()->add_dim(dim);
  401. }
  402. int64_t input_size = 0;
  403. if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  404. GELOGI("Get aipp input size according to attr is %ld", input_size);
  405. } else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
  406. REPORT_CALL_ERROR("E19999", "Get tensor size fail");
  407. GELOGE(PARAM_INVALID, "[Get][TensorSize] failed");
  408. return PARAM_INVALID;
  409. }
  410. GELOGD("Get input size in dump is %ld", input_size);
  411. input.set_size(input_size);
  412. input.set_address(static_cast<uint64_t>(addr));
  413. return SUCCESS;
  414. }
  415. Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, toolkit::aicpu::dump::Input &input,
  416. size_t i, const std::string &node_name_index) {
  417. std::string dump_op_name;
  418. std::string input_or_output;
  419. size_t index;
  420. // parser and find which node's input or output tensor desc is chosen for dump info
  421. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  422. GELOGE(PARAM_INVALID, "[Call][ParseNameIndex] Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  423. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  424. return PARAM_INVALID;
  425. }
  426. GE_CHECK_NOTNULL(compute_graph_);
  427. auto replace_node = compute_graph_->FindNode(dump_op_name);
  428. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  429. "[Check][Param] Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF "
  430. "attr[%s], cannot find redirect node[%s].",
  431. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  432. dump_op_name.c_str());
  433. auto replace_opdesc = replace_node->GetOpDesc();
  434. GE_CHECK_NOTNULL(replace_opdesc);
  435. auto iter = ref_info_.find(replace_opdesc);
  436. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  437. "[Check][Param] Op [%s] input desc[%zu] cannot find "
  438. "any saved redirect node[%s]'s info.",
  439. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  440. GE_CHECK_NOTNULL(iter->second);
  441. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  442. if (input_or_output == kDumpInput) {
  443. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  444. addr += kAddrLen * index;
  445. GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index),
  446. "[Generate][Input] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
  447. } else if (input_or_output == kDumpOutput) {
  448. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  449. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  450. addr += (index + replace_input_size) * kAddrLen;
  451. GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index),
  452. "[Generate][Input] failed for %s, index:%zu", inner_dump_info.op->GetName().c_str(), index);
  453. }
  454. GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  455. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  456. return SUCCESS;
  457. }
  458. Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, toolkit::aicpu::dump::Task &task) {
  459. GELOGI("Start dump input");
  460. const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
  461. const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op);
  462. if (input_descs.size() != input_addrs.size()) {
  463. REPORT_INNER_ERROR("E19999", "input_desc size:%zu != input addr size:%zu in op:%s(%s)",
  464. input_descs.size(), input_addrs.size(),
  465. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str());
  466. GELOGE(PARAM_INVALID, "[Check][Param] Invalid input desc addrs size %zu, op %s has %zu input desc.",
  467. input_addrs.size(), inner_dump_info.op->GetName().c_str(), input_descs.size());
  468. return PARAM_INVALID;
  469. }
  470. std::vector<int64_t> v_memory_type;
  471. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
  472. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()),
  473. "[Check][Param] DumpInput[%s], input size[%zu], input memory type size[%zu]",
  474. inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size());
  475. for (size_t i = 0; i < input_descs.size(); ++i) {
  476. toolkit::aicpu::dump::Input input;
  477. std::string node_name_index;
  478. // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  479. if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
  480. GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index),
  481. "[Dump][RefInput] failed, node name index:%s", node_name_index.c_str());
  482. task.mutable_input()->Add(std::move(input));
  483. // normal dump without attr
  484. } else {
  485. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  486. GELOGI("[L1Fusion] DumpInput[%s] input[%zu] is l1 addr", inner_dump_info.op->GetName().c_str(), i);
  487. int64_t input_size = 0;
  488. if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  489. GELOGI("Get aipp input size according to attr is %ld", input_size);
  490. } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
  491. REPORT_CALL_ERROR("E19999", "Get input tensor size fail in op:%s(%s), index:%zu",
  492. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
  493. GELOGE(PARAM_INVALID, "[Get][InputTensorSize] fail in op:%s(%s), index:%zu",
  494. inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i);
  495. return PARAM_INVALID;
  496. }
  497. GELOGI("Get input size of l1_fusion_dump is %ld", input_size);
  498. GenerateOpBuffer(input_size, task);
  499. } else {
  500. auto addr = inner_dump_info.args + kAddrLen * i;
  501. GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i),
  502. "[Generate][Input] failed for op:%s, index:%zu", inner_dump_info.op->GetName().c_str(), i);
  503. task.mutable_input()->Add(std::move(input));
  504. }
  505. }
  506. }
  507. return SUCCESS;
  508. }
  509. void DataDumper::GenerateOpBuffer(const int64_t &size, toolkit::aicpu::dump::Task &task) {
  510. toolkit::aicpu::dump::OpBuffer op_buffer;
  511. op_buffer.set_buffer_type(toolkit::aicpu::dump::BufferType::L1);
  512. op_buffer.set_address(reinterpret_cast<uintptr_t>(l1_fusion_addr_));
  513. op_buffer.set_size(size);
  514. task.mutable_buffer()->Add(std::move(op_buffer));
  515. }
  516. Status DataDumper::ExecuteLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  517. std::string proto_str;
  518. size_t proto_size = op_mapping_info.ByteSizeLong();
  519. bool ret = op_mapping_info.SerializeToString(&proto_str);
  520. if (!ret || proto_size == 0) {
  521. REPORT_INNER_ERROR("E19999", "Serialize proto to string fail");
  522. GELOGE(PARAM_INVALID, "[Call][SerializeToString] failed, proto size %zu.", proto_size);
  523. return PARAM_INVALID;
  524. }
  525. if (dev_mem_load_ != nullptr) {
  526. GELOGW("dev_mem_load_ has been used.");
  527. ReleaseDevMem(&dev_mem_load_);
  528. }
  529. rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
  530. if (rt_ret != RT_ERROR_NONE) {
  531. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  532. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  533. return RT_ERROR_TO_GE_STATUS(rt_ret);
  534. }
  535. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)
  536. rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  537. if (rt_ret != RT_ERROR_NONE) {
  538. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  539. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  540. return RT_ERROR_TO_GE_STATUS(rt_ret);
  541. }
  542. rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
  543. if (rt_ret != RT_ERROR_NONE) {
  544. REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, length:%zu, ret:0x%X", proto_size, rt_ret);
  545. GELOGE(RT_FAILED, "[Call][RtDatadumpInfoLoad] failed, length:%zu, ret:0x%X", proto_size, rt_ret);
  546. return RT_ERROR_TO_GE_STATUS(rt_ret);
  547. }
  548. load_flag_ = true;
  549. GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
  550. return SUCCESS;
  551. }
  552. Status DataDumper::ExecuteUnLoadDumpInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  553. std::string proto_str;
  554. size_t proto_size = op_mapping_info.ByteSizeLong();
  555. bool ret = op_mapping_info.SerializeToString(&proto_str);
  556. if (!ret || proto_size == 0) {
  557. REPORT_INNER_ERROR("E19999", "Serialize proto to string fail");
  558. GELOGE(PARAM_INVALID, "[Call][SerializeToString] failed, proto size %zu.", proto_size);
  559. return PARAM_INVALID;
  560. }
  561. if (dev_mem_unload_ != nullptr) {
  562. GELOGW("dev_mem_unload_ has been used.");
  563. ReleaseDevMem(&dev_mem_unload_);
  564. }
  565. rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
  566. if (rt_ret != RT_ERROR_NONE) {
  567. REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  568. GELOGE(RT_FAILED, "[Call][RtMalloc] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  569. return RT_ERROR_TO_GE_STATUS(rt_ret);
  570. }
  571. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)
  572. rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  573. if (rt_ret != RT_ERROR_NONE) {
  574. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  575. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, size:%zu, ret:0x%X", proto_size, rt_ret);
  576. return RT_ERROR_TO_GE_STATUS(rt_ret);
  577. }
  578. rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
  579. if (rt_ret != RT_ERROR_NONE) {
  580. REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, length:%zu, ret:0x%X", proto_size, rt_ret);
  581. GELOGE(RT_FAILED, "[Call][RtDatadumpInfoLoad] failed, length:%zu, ret:0x%X", proto_size, rt_ret);
  582. return RT_ERROR_TO_GE_STATUS(rt_ret);
  583. }
  584. load_flag_ = false;
  585. GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
  586. return SUCCESS;
  587. }
  588. Status DataDumper::LoadDumpInfo() {
  589. std::string dump_list_key;
  590. PrintCheckLog(dump_list_key);
  591. if (op_list_.empty()) {
  592. GELOGD("op_list_ is empty");
  593. }
  594. toolkit::aicpu::dump::OpMappingInfo op_mapping_info;
  595. auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id_) + "/";
  596. op_mapping_info.set_dump_path(dump_path);
  597. op_mapping_info.set_model_name(dump_list_key);
  598. op_mapping_info.set_model_id(model_id_);
  599. op_mapping_info.set_flag(kAicpuLoadFlag);
  600. op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
  601. SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  602. auto ret = BuildTaskInfo(op_mapping_info);
  603. if (ret != SUCCESS) {
  604. GELOGE(ret, "[Build][TaskInfo] failed, ret:%u, path:%s", ret, dump_path.c_str());
  605. return ret;
  606. }
  607. SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info);
  608. SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);
  609. if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
  610. ret = ExecuteLoadDumpInfo(op_mapping_info);
  611. if (ret != SUCCESS) {
  612. GELOGE(ret, "[Execute][LoadDumpInfo] failed, ret:%u", ret);
  613. return ret;
  614. }
  615. }
  616. return SUCCESS;
  617. }
  618. Status DataDumper::BuildTaskInfo(toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  619. for (const auto &op_iter : op_list_) {
  620. auto op_desc = op_iter.op;
  621. GELOGD("Op %s in model begin to add task in op_mapping_info", op_desc->GetName().c_str());
  622. toolkit::aicpu::dump::Task task;
  623. task.set_end_graph(false);
  624. task.set_task_id(op_iter.task_id);
  625. task.set_stream_id(op_iter.stream_id);
  626. task.mutable_op()->set_op_name(op_desc->GetName());
  627. task.mutable_op()->set_op_type(op_desc->GetType());
  628. if (dump_properties_.GetDumpMode() == kDumpOutput) {
  629. Status ret = DumpOutput(op_iter, task);
  630. if (ret != SUCCESS) {
  631. GELOGE(ret, "[Dump][Output] failed, ret:%u, op:%s", ret, op_desc->GetName().c_str());
  632. return ret;
  633. }
  634. op_mapping_info.mutable_task()->Add(std::move(task));
  635. continue;
  636. }
  637. if (dump_properties_.GetDumpMode() == kDumpInput) {
  638. if (op_iter.is_task) {
  639. Status ret = DumpInput(op_iter, task);
  640. if (ret != SUCCESS) {
  641. GELOGE(ret, "[Dump][Input] failed, ret:%u, op:%s", ret, op_desc->GetName().c_str());
  642. return ret;
  643. }
  644. }
  645. op_mapping_info.mutable_task()->Add(std::move(task));
  646. continue;
  647. }
  648. if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) {
  649. auto ret = DumpOutput(op_iter, task);
  650. if (ret != SUCCESS) {
  651. GELOGE(ret, "[Dump][Output] failed when in dumping all, ret:%u, op:%s", ret, op_desc->GetName().c_str());
  652. return ret;
  653. }
  654. if (op_iter.is_task) {
  655. ret = DumpInput(op_iter, task);
  656. if (ret != SUCCESS) {
  657. GELOGE(ret, "[Dump][Input] failed when in dumping all, ret:%u, op:%s", ret, op_desc->GetName().c_str());
  658. return ret;
  659. }
  660. }
  661. op_mapping_info.mutable_task()->Add(std::move(task));
  662. continue;
  663. }
  664. }
  665. return SUCCESS;
  666. }
  667. void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
  668. toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  669. if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
  670. dump_properties_.GetDumpMode() == kDumpAll) {
  671. toolkit::aicpu::dump::Task task;
  672. task.set_end_graph(true);
  673. task.set_task_id(end_graph_task_id_);
  674. task.set_stream_id(end_graph_stream_id_);
  675. task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
  676. task.mutable_op()->set_op_type(ENDGRAPH);
  677. op_mapping_info.mutable_task()->Add(std::move(task));
  678. is_end_graph_ = true;
  679. if (op_mapping_info.model_name_param_case() == toolkit::aicpu::dump::OpMappingInfo::kModelName) {
  680. GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u",
  681. op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_);
  682. return;
  683. }
  684. GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
  685. }
  686. }
  687. void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
  688. toolkit::aicpu::dump::OpMappingInfo &op_mapping_info) {
  689. if (is_op_debug_) {
  690. GELOGI("add op_debug_info to aicpu, task_id is %u, stream_id is %u", task_id, stream_id);
  691. toolkit::aicpu::dump::Task task;
  692. task.set_end_graph(false);
  693. task.set_task_id(task_id);
  694. task.set_stream_id(stream_id);
  695. task.mutable_op()->set_op_name(NODE_NAME_OP_DEBUG);
  696. task.mutable_op()->set_op_type(OP_TYPE_OP_DEBUG);
  697. // set output
  698. toolkit::aicpu::dump::Output output;
  699. output.set_data_type(DT_UINT8);
  700. output.set_format(FORMAT_ND);
  701. output.mutable_shape()->add_dim(kOpDebugShape);
  702. output.set_original_name(NODE_NAME_OP_DEBUG);
  703. output.set_original_output_index(0);
  704. output.set_original_output_format(FORMAT_ND);
  705. output.set_original_output_data_type(DT_UINT8);
  706. // due to lhisi virtual addr bug, cannot use args now
  707. output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr)));
  708. output.set_size(kOpDebugSize);
  709. task.mutable_output()->Add(std::move(output));
  710. op_mapping_info.mutable_task()->Add(std::move(task));
  711. }
  712. }
  713. Status DataDumper::UnloadDumpInfo() {
  714. if (!load_flag_) {
  715. load_flag_ = false;
  716. return SUCCESS;
  717. }
  718. GELOGI("UnloadDumpInfo start.");
  719. toolkit::aicpu::dump::OpMappingInfo op_mapping_info;
  720. op_mapping_info.set_model_id(model_id_);
  721. op_mapping_info.set_flag(kAicpuUnloadFlag);
  722. for (const auto &op_iter : op_list_) {
  723. toolkit::aicpu::dump::Task task;
  724. task.set_task_id(op_iter.task_id);
  725. task.set_stream_id(op_iter.stream_id);
  726. op_mapping_info.mutable_task()->Add(std::move(task));
  727. }
  728. auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
  729. if (ret != SUCCESS) {
  730. GELOGE(ret, "[Execute][UnLoadDumpInfo] failed, ret:%d", ret);
  731. return ret;
  732. }
  733. return SUCCESS;
  734. }
  735. void DataDumper::DumpShrink() {
  736. compute_graph_.reset();
  737. input_map_.clear();
  738. ref_info_.clear();
  739. }
  740. void DataDumper::PrintCheckLog(string &dump_list_key) {
  741. std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
  742. if (model_list.empty()) {
  743. return;
  744. }
  745. bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  746. bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  747. dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  748. GELOGI("%zu op need dump in known shape model %s.", op_list_.size(), dump_list_key.c_str());
  749. if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
  750. if (not_find_by_omname && not_find_by_modelname) {
  751. std::string model_list_str;
  752. for (auto &model : model_list) {
  753. model_list_str += "[" + model + "].";
  754. }
  755. GELOGW("Model %s will not be set to dump, dump list: %s", dump_list_key.c_str(), model_list_str.c_str());
  756. return;
  757. }
  758. }
  759. std::set<std::string> config_dump_op_list = dump_properties_.GetPropertyValue(dump_list_key);
  760. std::set<std::string> dump_op_list;
  761. for (auto &inner_dump_info : op_list_) {
  762. // oplist value OpDescPtr is not nullptr
  763. dump_op_list.insert(inner_dump_info.op->GetName());
  764. }
  765. for (auto &dump_op : config_dump_op_list) {
  766. if (dump_op_list.find(dump_op) == dump_op_list.end()) {
  767. GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
  768. }
  769. }
  770. }
  771. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示