You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_dumper.cc 41 kB

5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/new_model_manager/data_dumper.h"
  17. #include <sys/time.h>
  18. #include <cstdlib>
  19. #include <ctime>
  20. #include <map>
  21. #include <utility>
  22. #include <vector>
  23. #include "common/debug/memory_dumper.h"
  24. #include "common/properties_manager.h"
  25. #include "common/util.h"
  26. #include "framework/common/debug/ge_log.h"
  27. #include "framework/common/util.h"
  28. #include "graph/anchor.h"
  29. #include "graph/debug/ge_attr_define.h"
  30. #include "graph/load/new_model_manager/model_utils.h"
  31. #include "graph/manager/util/debug.h"
  32. #include "graph/utils/attr_utils.h"
  33. #include "graph/utils/tensor_utils.h"
  34. #include "proto/dump_task.pb.h"
  35. #include "proto/ge_ir.pb.h"
  36. #include "proto/op_mapping_info.pb.h"
  37. #include "runtime/base.h"
  38. #include "runtime/mem.h"
  39. namespace {
  40. const uint32_t kAicpuLoadFlag = 1;
  41. const uint32_t kAicpuUnloadFlag = 0;
  42. const int64_t kOpDebugSize = 2048;
  43. const int64_t kOpDebugShape = 2048;
  44. const int8_t kDecimal = 10;
  45. const uint32_t kAddrLen = sizeof(void *);
  46. const char *const kDumpOutput = "output";
  47. const char *const kDumpInput = "input";
  48. const char *const kDumpAll = "all";
  49. // parse for format like nodename:input:index
  50. static bool ParseNameIndex(const std::string &node_name_index, std::string &node_name, std::string &input_or_output,
  51. size_t &index) {
  52. auto sep = node_name_index.rfind(':');
  53. if (sep == std::string::npos) {
  54. return false;
  55. }
  56. auto index_str = node_name_index.substr(sep + 1);
  57. index = static_cast<size_t>(std::strtol(index_str.c_str(), nullptr, kDecimal));
  58. auto node_name_without_index = node_name_index.substr(0, sep);
  59. sep = node_name_without_index.rfind(':');
  60. if (sep == std::string::npos) {
  61. return false;
  62. }
  63. node_name = node_name_without_index.substr(0, sep);
  64. input_or_output = node_name_without_index.substr(sep + 1);
  65. return !(input_or_output != kDumpInput && input_or_output != kDumpOutput);
  66. }
  67. static bool IsTensorDescWithSkipDumpAddrType(bool has_mem_type_attr, vector<int64_t> v_memory_type, size_t i) {
  68. return has_mem_type_attr && (v_memory_type[i] == RT_MEMORY_L1);
  69. }
  70. static uint64_t GetNowTime() {
  71. uint64_t ret = 0;
  72. struct timeval tv;
  73. if (gettimeofday(&tv, NULL) == 0) {
  74. ret = tv.tv_sec * 1000000ULL + tv.tv_usec;
  75. }
  76. return ret;
  77. }
  78. static void ReplaceStringElem(std::string &str) {
  79. for_each(str.begin(), str.end(), [](char &ch) {
  80. if ((ch == ' ') || (ch == '.') || (ch == '/') || (ch == '\\')) {
  81. ch = '_';
  82. }
  83. });
  84. }
  85. } // namespace
  86. static int32_t GetIrDataType(ge::DataType data_type) {
  87. static const std::map<ge::DataType, ge::proto::DataType> data_type_map = {
  88. {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  89. {ge::DT_FLOAT, ge::proto::DT_FLOAT},
  90. {ge::DT_FLOAT16, ge::proto::DT_FLOAT16},
  91. {ge::DT_INT8, ge::proto::DT_INT8},
  92. {ge::DT_UINT8, ge::proto::DT_UINT8},
  93. {ge::DT_INT16, ge::proto::DT_INT16},
  94. {ge::DT_UINT16, ge::proto::DT_UINT16},
  95. {ge::DT_INT32, ge::proto::DT_INT32},
  96. {ge::DT_INT64, ge::proto::DT_INT64},
  97. {ge::DT_UINT32, ge::proto::DT_UINT32},
  98. {ge::DT_UINT64, ge::proto::DT_UINT64},
  99. {ge::DT_BOOL, ge::proto::DT_BOOL},
  100. {ge::DT_DOUBLE, ge::proto::DT_DOUBLE},
  101. {ge::DT_DUAL, ge::proto::DT_DUAL},
  102. {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8},
  103. {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8},
  104. {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64},
  105. {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128},
  106. {ge::DT_QINT8, ge::proto::DT_QINT8},
  107. {ge::DT_QINT16, ge::proto::DT_QINT16},
  108. {ge::DT_QINT32, ge::proto::DT_QINT32},
  109. {ge::DT_QUINT8, ge::proto::DT_QUINT8},
  110. {ge::DT_QUINT16, ge::proto::DT_QUINT16},
  111. {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
  112. {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
  113. {ge::DT_STRING, ge::proto::DT_STRING},
  114. };
  115. auto iter = data_type_map.find(data_type);
  116. if (iter == data_type_map.end()) {
  117. return static_cast<int32_t>(ge::proto::DT_UNDEFINED);
  118. }
  119. return static_cast<int32_t>(iter->second);
  120. }
  121. namespace ge {
  122. DataDumper::~DataDumper() {
  123. ReleaseDevMem(&dev_mem_load_);
  124. ReleaseDevMem(&dev_mem_unload_);
  125. }
  126. void DataDumper::ReleaseDevMem(void **ptr) noexcept {
  127. if (ptr == nullptr) {
  128. return;
  129. }
  130. if (*ptr != nullptr) {
  131. rtError_t rt_ret = rtFree(*ptr);
  132. if (rt_ret != RT_ERROR_NONE) {
  133. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", rt_ret);
  134. }
  135. *ptr = nullptr;
  136. }
  137. }
  138. void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) {
  139. global_step_ = reinterpret_cast<uintptr_t>(global_step);
  140. loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter);
  141. loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
  142. }
  143. void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
  144. GELOGI("Start to save data %s message", node->GetName().c_str());
  145. if (node != nullptr) {
  146. auto input_op_desc = node->GetOpDesc();
  147. if (input_op_desc == nullptr) {
  148. GELOGE(PARAM_INVALID, "input op desc is null.");
  149. return;
  150. }
  151. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  152. for (auto &dst_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
  153. ge::NodePtr dst_node = dst_in_data_anchor->GetOwnerNode();
  154. auto op_desc = dst_node->GetOpDesc();
  155. if (op_desc == nullptr) {
  156. GELOGE(PARAM_INVALID, "input op desc is null.");
  157. return;
  158. }
  159. input_map_.insert(
  160. {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
  161. }
  162. }
  163. GELOGI("Save data message successfully");
  164. }
  165. }
  166. void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) {
  167. end_graph_task_id_ = task_id;
  168. end_graph_stream_id_ = stream_id;
  169. }
  170. void DataDumper::SaveOpDebugId(uint32_t task_id, uint32_t stream_id, void *op_debug_addr, bool is_op_debug) {
  171. op_debug_task_id_ = task_id;
  172. op_debug_stream_id_ = stream_id;
  173. op_debug_addr_ = op_debug_addr;
  174. is_op_debug_ = is_op_debug;
  175. }
  176. void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id,
  177. uint32_t stream_id) {
  178. GELOGD("Start SaveDumpOpInfo of task_id: %u, stream_id: %u", task_id, stream_id);
  179. OpDescInfo op_desc_info;
  180. op_desc_info.op_name = op->GetName();
  181. op_desc_info.op_type = op->GetType();
  182. op_desc_info.task_id = task_id;
  183. op_desc_info.stream_id = stream_id;
  184. for (size_t i = 0; i < op->GetAllInputsSize(); ++i) {
  185. GeTensorDescPtr input_tensor_desc = op->MutableInputDesc(i);
  186. if (input_tensor_desc == nullptr) {
  187. continue;
  188. }
  189. op_desc_info.input_format.emplace_back(input_tensor_desc->GetFormat());
  190. op_desc_info.input_shape.emplace_back(input_tensor_desc->GetShape().GetDims());
  191. op_desc_info.input_data_type.emplace_back(input_tensor_desc->GetDataType());
  192. int64_t input_size = 0;
  193. if (TensorUtils::GetTensorSizeInBytes(*input_tensor_desc, input_size) != SUCCESS) {
  194. GELOGW("Get input size failed");
  195. return;
  196. }
  197. GELOGI("Save dump op info, the input size is %ld", input_size);
  198. op_desc_info.input_size.emplace_back(input_size);
  199. }
  200. for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
  201. GeTensorDescPtr output_tensor_desc = op->MutableOutputDesc(j);
  202. if (output_tensor_desc == nullptr) {
  203. continue;
  204. }
  205. op_desc_info.output_format.emplace_back(output_tensor_desc->GetFormat());
  206. op_desc_info.output_shape.emplace_back(output_tensor_desc->GetShape().GetDims());
  207. op_desc_info.output_data_type.emplace_back(output_tensor_desc->GetDataType());
  208. int64_t output_size = 0;
  209. if (TensorUtils::GetTensorSizeInBytes(*output_tensor_desc, output_size) != SUCCESS) {
  210. GELOGW("Get input size failed");
  211. return;
  212. }
  213. GELOGI("Save dump op info, the output size is %ld", output_size);
  214. op_desc_info.output_size.emplace_back(output_size);
  215. }
  216. op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
  217. op_desc_info.output_addrs = ModelUtils::GetOutputDataAddrs(model_param, op);
  218. op_desc_info_.emplace_back(op_desc_info);
  219. }
  220. bool DataDumper::GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const {
  221. GELOGI("There are %zu op need to dump.", op_desc_info_.size());
  222. for (size_t index = 0; index < op_desc_info_.size(); ++index) {
  223. OpDescInfo dump_op_info = op_desc_info_.at(index);
  224. if (dump_op_info.task_id == task_id && dump_op_info.stream_id == stream_id) {
  225. GELOGI("find exception op of task_id: %u, stream_id: %u.", task_id, stream_id);
  226. op_desc_info = dump_op_info;
  227. return true;
  228. }
  229. }
  230. return false;
  231. }
  232. void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
  233. uintptr_t args) {
  234. if (op_desc == nullptr) {
  235. GELOGE(PARAM_INVALID, "Opdesc is nullptr");
  236. return;
  237. }
  238. GELOGI("Save dump task %s, task id: %u, stream id: %u", op_desc->GetName().c_str(), task_id, stream_id);
  239. op_list_.push_back({task_id, stream_id, op_desc, args, true});
  240. for (auto iter = input_map_.equal_range(op_desc->GetName()); iter.first != iter.second; ++iter.first) {
  241. InnerInputMapping &inner_input_mapping = iter.first->second;
  242. auto &data_op = inner_input_mapping.data_op;
  243. if (data_op == nullptr) {
  244. GELOGE(PARAM_INVALID, "data_op is null.");
  245. return;
  246. }
  247. auto input_tensor = op_desc->GetInputDescPtr(inner_input_mapping.input_anchor_index);
  248. if (input_tensor == nullptr) {
  249. GELOGE(PARAM_INVALID, "input_tensor is null, index: %d, size: %zu.", inner_input_mapping.input_anchor_index,
  250. op_desc->GetInputsSize());
  251. return;
  252. }
  253. int64_t data_size = 0;
  254. if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
  255. GELOGI("Get aipp data size according to attr is %ld", data_size);
  256. } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
  257. GELOGE(PARAM_INVALID, "Get input size filed");
  258. return;
  259. }
  260. GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
  261. stream_id, data_size);
  262. op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
  263. inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
  264. }
  265. }
  266. static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
  267. aicpu::dump::OpMappingInfo &op_mapping_info) {
  268. if (step_id != 0) {
  269. GELOGI("step_id exists.");
  270. op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  271. } else {
  272. GELOGI("step_id is null.");
  273. }
  274. if (loop_per_iter != 0) {
  275. GELOGI("loop_per_iter exists.");
  276. op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  277. } else {
  278. GELOGI("loop_per_iter is null.");
  279. }
  280. if (loop_cond != 0) {
  281. GELOGI("loop_cond exists.");
  282. op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  283. } else {
  284. GELOGI("loop_cond is null.");
  285. }
  286. }
  287. Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  288. const uintptr_t &addr, size_t index) {
  289. output.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  290. output.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  291. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  292. output.mutable_shape()->add_dim(dim);
  293. }
  294. int64_t output_size = 0;
  295. if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
  296. GELOGE(PARAM_INVALID, "Get output size filed");
  297. return PARAM_INVALID;
  298. }
  299. GELOGD("Get output size in dump is %ld", output_size);
  300. std::string origin_name;
  301. int32_t origin_output_index = -1;
  302. (void)AttrUtils::GetStr(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  303. (void)AttrUtils::GetInt(&tensor_descs.at(index), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  304. output.set_size(output_size);
  305. output.set_original_name(origin_name);
  306. output.set_original_output_index(origin_output_index);
  307. output.set_original_output_format(static_cast<int32_t>(tensor_descs.at(index).GetOriginFormat()));
  308. output.set_original_output_data_type(static_cast<int32_t>(tensor_descs.at(index).GetOriginDataType()));
  309. output.set_address(static_cast<uint64_t>(addr));
  310. return SUCCESS;
  311. }
  312. Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Output &output,
  313. size_t i, const std::string &node_name_index) {
  314. std::string dump_op_name;
  315. std::string input_or_output;
  316. size_t index;
  317. // parser and find which node's input or output tensor desc is chosen for dump info
  318. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  319. GELOGE(PARAM_INVALID, "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  320. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  321. return PARAM_INVALID;
  322. }
  323. GE_CHECK_NOTNULL(compute_graph_);
  324. auto replace_node = compute_graph_->FindNode(dump_op_name);
  325. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  326. "Op [%s] output desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
  327. " cannot find redirect node[%s].",
  328. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  329. dump_op_name.c_str());
  330. auto replace_opdesc = replace_node->GetOpDesc();
  331. GE_CHECK_NOTNULL(replace_opdesc);
  332. auto iter = ref_info_.find(replace_opdesc);
  333. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  334. "Op [%s] output desc[%zu] cannot find any saved redirect node[%s]'s info.",
  335. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  336. GE_CHECK_NOTNULL(iter->second);
  337. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  338. if (input_or_output == kDumpInput) {
  339. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  340. addr += kAddrLen * index;
  341. GE_CHK_STATUS_RET(GenerateOutput(output, replace_input_descs, addr, index), "Generate output failed");
  342. } else if (input_or_output == kDumpOutput) {
  343. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  344. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  345. addr += (index + replace_input_size) * kAddrLen;
  346. GE_CHK_STATUS_RET(GenerateOutput(output, replace_output_descs, addr, index), "Generate output failed");
  347. }
  348. GELOGD("Op [%s] output desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  349. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  350. return SUCCESS;
  351. }
  352. Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  353. const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
  354. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
  355. if (output_descs.size() != output_addrs.size()) {
  356. GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
  357. inner_dump_info.op->GetName().c_str(), output_descs.size());
  358. return PARAM_INVALID;
  359. }
  360. std::vector<int64_t> v_memory_type;
  361. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
  362. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != output_descs.size()),
  363. "DumpOutputWithTask[%s], output size[%zu], output memory type size[%zu]",
  364. inner_dump_info.op->GetName().c_str(), output_descs.size(),
  365. v_memory_type.size());
  366. for (size_t i = 0; i < output_descs.size(); ++i) {
  367. aicpu::dump::Output output;
  368. std::string node_name_index;
  369. const auto &output_desc = output_descs.at(i);
  370. // check dump output tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  371. if (AttrUtils::GetStr(&output_desc, ATTR_DATA_DUMP_REF, node_name_index)) {
  372. GE_CHK_STATUS_RET(DumpRefOutput(inner_dump_info, output, i, node_name_index), "DumpRefOutput failed");
  373. task.mutable_output()->Add(std::move(output));
  374. } else {
  375. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  376. GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i);
  377. int64_t output_size = 0;
  378. if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
  379. GELOGE(PARAM_INVALID, "Get output size failed.");
  380. return PARAM_INVALID;
  381. }
  382. GELOGI("Get output size of l1_fusion_dump is %ld", output_size);
  383. GenerateOpBuffer(output_size, task);
  384. } else {
  385. const auto input_size = inner_dump_info.op->GetInputsSize();
  386. auto addr = inner_dump_info.args + (i + input_size) * kAddrLen;
  387. GE_CHK_STATUS_RET(GenerateOutput(output, output_descs, addr, i), "Generate output failed");
  388. task.mutable_output()->Add(std::move(output));
  389. }
  390. }
  391. }
  392. return SUCCESS;
  393. }
  394. Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  395. GELOGI("Start dump output");
  396. if (inner_dump_info.is_task) {
  397. // tbe or aicpu op, these ops are with task
  398. return DumpOutputWithTask(inner_dump_info, task);
  399. }
  400. // else data, const or variable op
  401. aicpu::dump::Output output;
  402. auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
  403. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
  404. if (output_tensor == nullptr) {
  405. GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
  406. inner_dump_info.op->GetOutputsSize());
  407. return PARAM_INVALID;
  408. }
  409. output.set_data_type(static_cast<int32_t>(GetIrDataType(output_tensor->GetDataType())));
  410. output.set_format(static_cast<int32_t>(output_tensor->GetFormat()));
  411. for (auto dim : inner_dump_info.dims) {
  412. output.mutable_shape()->add_dim(dim);
  413. }
  414. std::string origin_name;
  415. int32_t origin_output_index = -1;
  416. (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  417. (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  418. output.set_size(inner_dump_info.data_size);
  419. output.set_original_name(origin_name);
  420. output.set_original_output_index(origin_output_index);
  421. output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
  422. output.set_original_output_data_type(static_cast<int32_t>(output_tensor->GetOriginDataType()));
  423. // due to lhisi virtual addr bug, cannot use args now
  424. if (inner_dump_info.output_anchor_index >= static_cast<int>(output_addrs.size())) {
  425. GELOGE(FAILED, "Index is out of range.");
  426. return FAILED;
  427. }
  428. auto data_addr = inner_dump_info.args + kAddrLen * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
  429. output.set_address(static_cast<uint64_t>(data_addr));
  430. task.mutable_output()->Add(std::move(output));
  431. return SUCCESS;
  432. }
  433. Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor<GeTensorDesc> &tensor_descs,
  434. const uintptr_t &addr, size_t index) {
  435. input.set_data_type(static_cast<int32_t>(GetIrDataType(tensor_descs.at(index).GetDataType())));
  436. input.set_format(static_cast<int32_t>(tensor_descs.at(index).GetFormat()));
  437. for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
  438. input.mutable_shape()->add_dim(dim);
  439. }
  440. int64_t input_size = 0;
  441. if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  442. GELOGI("Get aipp input size according to attr is %ld", input_size);
  443. } else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
  444. GELOGE(PARAM_INVALID, "Get input size filed");
  445. return PARAM_INVALID;
  446. }
  447. GELOGD("Get input size in dump is %ld", input_size);
  448. input.set_size(input_size);
  449. input.set_address(static_cast<uint64_t>(addr));
  450. return SUCCESS;
  451. }
  452. Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info, aicpu::dump::Input &input, size_t i,
  453. const std::string &node_name_index) {
  454. std::string dump_op_name;
  455. std::string input_or_output;
  456. size_t index;
  457. // parser and find which node's input or output tensor desc is chosen for dump info
  458. if (!ParseNameIndex(node_name_index, dump_op_name, input_or_output, index)) {
  459. GELOGE(PARAM_INVALID, "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s].",
  460. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str());
  461. return PARAM_INVALID;
  462. }
  463. GE_CHECK_NOTNULL(compute_graph_);
  464. auto replace_node = compute_graph_->FindNode(dump_op_name);
  465. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(replace_node == nullptr,
  466. "Op [%s] input desc[%zu] with invalid ATTR_DATA_DUMP_REF attr[%s],"
  467. " cannot find redirect node[%s].",
  468. inner_dump_info.op->GetName().c_str(), i, node_name_index.c_str(),
  469. dump_op_name.c_str());
  470. auto replace_opdesc = replace_node->GetOpDesc();
  471. GE_CHECK_NOTNULL(replace_opdesc);
  472. auto iter = ref_info_.find(replace_opdesc);
  473. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(iter == ref_info_.end(),
  474. "Op [%s] input desc[%zu] cannot find any saved redirect node[%s]'s info.",
  475. inner_dump_info.op->GetName().c_str(), i, replace_opdesc->GetName().c_str());
  476. GE_CHECK_NOTNULL(iter->second);
  477. auto addr = reinterpret_cast<uintptr_t>(iter->second);
  478. if (input_or_output == kDumpInput) {
  479. const auto &replace_input_descs = replace_opdesc->GetAllInputsDesc();
  480. addr += kAddrLen * index;
  481. GE_CHK_STATUS_RET(GenerateInput(input, replace_input_descs, addr, index), "Generate input failed");
  482. } else if (input_or_output == kDumpOutput) {
  483. const auto &replace_output_descs = replace_opdesc->GetAllOutputsDesc();
  484. const auto replace_input_size = replace_opdesc->GetAllInputsDesc().size();
  485. addr += (index + replace_input_size) * kAddrLen;
  486. GE_CHK_STATUS_RET(GenerateInput(input, replace_output_descs, addr, index), "Generate input failed");
  487. }
  488. GELOGD("Op [%s] input desc[%zu] dump info is replaced by node[%s] [%s] tensor_desc [%zu]",
  489. inner_dump_info.op->GetName().c_str(), i, dump_op_name.c_str(), input_or_output.c_str(), index);
  490. return SUCCESS;
  491. }
  492. Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  493. GELOGI("Start dump input");
  494. const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
  495. const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op);
  496. if (input_descs.size() != input_addrs.size()) {
  497. GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
  498. inner_dump_info.op->GetName().c_str(), input_descs.size());
  499. return PARAM_INVALID;
  500. }
  501. std::vector<int64_t> v_memory_type;
  502. bool has_mem_type_attr = ge::AttrUtils::GetListInt(inner_dump_info.op, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
  503. GE_RT_PARAM_INVALID_WITH_LOG_IF_TRUE(has_mem_type_attr && (v_memory_type.size() != input_descs.size()),
  504. "DumpInput[%s], input size[%zu], input memory type size[%zu]",
  505. inner_dump_info.op->GetName().c_str(), input_descs.size(), v_memory_type.size());
  506. for (size_t i = 0; i < input_descs.size(); ++i) {
  507. aicpu::dump::Input input;
  508. std::string node_name_index;
  509. // check dump input tensor desc is redirected by attr ATTR_DATA_DUMP_REF
  510. if (AttrUtils::GetStr(&input_descs.at(i), ATTR_DATA_DUMP_REF, node_name_index)) {
  511. GE_CHK_STATUS_RET(DumpRefInput(inner_dump_info, input, i, node_name_index), "DumpRefInput failed");
  512. task.mutable_input()->Add(std::move(input));
  513. // normal dump without attr
  514. } else {
  515. if (IsTensorDescWithSkipDumpAddrType(has_mem_type_attr, v_memory_type, i)) {
  516. GELOGI("[L1Fusion] DumpInput[%s] input[%zu] is l1 addr", inner_dump_info.op->GetName().c_str(), i);
  517. int64_t input_size = 0;
  518. if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  519. GELOGI("Get aipp input size according to attr is %ld", input_size);
  520. } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
  521. GELOGE(PARAM_INVALID, "Get input size failed.");
  522. return PARAM_INVALID;
  523. }
  524. GELOGI("Get input size of l1_fusion_dump is %ld", input_size);
  525. GenerateOpBuffer(input_size, task);
  526. } else {
  527. auto addr = inner_dump_info.args + kAddrLen * i;
  528. GE_CHK_STATUS_RET(GenerateInput(input, input_descs, addr, i), "Generate input failed");
  529. task.mutable_input()->Add(std::move(input));
  530. }
  531. }
  532. }
  533. return SUCCESS;
  534. }
  535. void DataDumper::GenerateOpBuffer(const int64_t &size, aicpu::dump::Task &task) {
  536. aicpu::dump::OpBuffer op_buffer;
  537. op_buffer.set_buffer_type(aicpu::dump::BufferType::L1);
  538. op_buffer.set_address(reinterpret_cast<uintptr_t>(l1_fusion_addr_));
  539. op_buffer.set_size(size);
  540. task.mutable_buffer()->Add(std::move(op_buffer));
  541. }
  542. Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  543. std::string proto_str;
  544. size_t proto_size = op_mapping_info.ByteSizeLong();
  545. bool ret = op_mapping_info.SerializeToString(&proto_str);
  546. if (!ret || proto_size == 0) {
  547. GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  548. return PARAM_INVALID;
  549. }
  550. if (dev_mem_load_ != nullptr) {
  551. GELOGW("dev_mem_load_ has been used.");
  552. ReleaseDevMem(&dev_mem_load_);
  553. }
  554. rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
  555. if (rt_ret != RT_ERROR_NONE) {
  556. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  557. return RT_ERROR_TO_GE_STATUS(rt_ret);
  558. }
  559. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)
  560. rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  561. if (rt_ret != RT_ERROR_NONE) {
  562. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  563. return RT_ERROR_TO_GE_STATUS(rt_ret);
  564. }
  565. rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
  566. if (rt_ret != RT_ERROR_NONE) {
  567. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  568. return RT_ERROR_TO_GE_STATUS(rt_ret);
  569. }
  570. load_flag_ = true;
  571. GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
  572. return SUCCESS;
  573. }
  574. Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  575. std::string proto_str;
  576. size_t proto_size = op_mapping_info.ByteSizeLong();
  577. bool ret = op_mapping_info.SerializeToString(&proto_str);
  578. if (!ret || proto_size == 0) {
  579. GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  580. return PARAM_INVALID;
  581. }
  582. if (dev_mem_unload_ != nullptr) {
  583. GELOGW("dev_mem_unload_ has been used.");
  584. ReleaseDevMem(&dev_mem_unload_);
  585. }
  586. rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
  587. if (rt_ret != RT_ERROR_NONE) {
  588. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  589. return RT_ERROR_TO_GE_STATUS(rt_ret);
  590. }
  591. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)
  592. rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  593. if (rt_ret != RT_ERROR_NONE) {
  594. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  595. return RT_ERROR_TO_GE_STATUS(rt_ret);
  596. }
  597. rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
  598. if (rt_ret != RT_ERROR_NONE) {
  599. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  600. return RT_ERROR_TO_GE_STATUS(rt_ret);
  601. }
  602. load_flag_ = false;
  603. GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
  604. return SUCCESS;
  605. }
  606. Status DataDumper::LoadDumpInfo() {
  607. std::string dump_list_key;
  608. PrintCheckLog(dump_list_key);
  609. if (op_list_.empty()) {
  610. GELOGW("op_list_ is empty");
  611. }
  612. aicpu::dump::OpMappingInfo op_mapping_info;
  613. auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id_) + "/";
  614. op_mapping_info.set_dump_path(dump_path);
  615. op_mapping_info.set_model_name(dump_list_key);
  616. op_mapping_info.set_model_id(model_id_);
  617. op_mapping_info.set_flag(kAicpuLoadFlag);
  618. op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
  619. SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  620. GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info",
  621. dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str());
  622. auto ret = BuildTaskInfo(op_mapping_info);
  623. if (ret != SUCCESS) {
  624. GELOGE(ret, "Build task info failed");
  625. return ret;
  626. }
  627. SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info);
  628. SetOpDebugIdToAicpu(op_debug_task_id_, op_debug_stream_id_, op_debug_addr_, op_mapping_info);
  629. if (!op_list_.empty() || is_op_debug_ || is_end_graph_) {
  630. auto ret = ExecuteLoadDumpInfo(op_mapping_info);
  631. if (ret != SUCCESS) {
  632. GELOGE(ret, "Execute load dump info failed");
  633. return ret;
  634. }
  635. }
  636. return SUCCESS;
  637. }
  638. Status DataDumper::BuildTaskInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  639. for (const auto &op_iter : op_list_) {
  640. auto op_desc = op_iter.op;
  641. GELOGD("Op %s in model begin to add task in op_mapping_info", op_desc->GetName().c_str());
  642. aicpu::dump::Task task;
  643. task.set_end_graph(false);
  644. task.set_task_id(op_iter.task_id);
  645. task.set_stream_id(op_iter.stream_id);
  646. task.mutable_op()->set_op_name(op_desc->GetName());
  647. task.mutable_op()->set_op_type(op_desc->GetType());
  648. if (dump_properties_.GetDumpMode() == kDumpOutput) {
  649. Status ret = DumpOutput(op_iter, task);
  650. if (ret != SUCCESS) {
  651. GELOGE(ret, "Dump output failed");
  652. return ret;
  653. }
  654. op_mapping_info.mutable_task()->Add(std::move(task));
  655. continue;
  656. }
  657. if (dump_properties_.GetDumpMode() == kDumpInput) {
  658. if (op_iter.is_task) {
  659. Status ret = DumpInput(op_iter, task);
  660. if (ret != SUCCESS) {
  661. GELOGE(ret, "Dump input failed");
  662. return ret;
  663. }
  664. }
  665. op_mapping_info.mutable_task()->Add(std::move(task));
  666. continue;
  667. }
  668. if (dump_properties_.GetDumpMode() == kDumpAll || is_op_debug_) {
  669. auto ret = DumpOutput(op_iter, task);
  670. if (ret != SUCCESS) {
  671. GELOGE(ret, "Dump output failed when in dumping all");
  672. return ret;
  673. }
  674. if (op_iter.is_task) {
  675. ret = DumpInput(op_iter, task);
  676. if (ret != SUCCESS) {
  677. GELOGE(ret, "Dump input failed when in dumping all");
  678. return ret;
  679. }
  680. }
  681. op_mapping_info.mutable_task()->Add(std::move(task));
  682. continue;
  683. }
  684. }
  685. return SUCCESS;
  686. }
  687. void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
  688. aicpu::dump::OpMappingInfo &op_mapping_info) {
  689. if (dump_properties_.GetDumpMode() == kDumpOutput || dump_properties_.GetDumpMode() == kDumpInput ||
  690. dump_properties_.GetDumpMode() == kDumpAll) {
  691. aicpu::dump::Task task;
  692. task.set_end_graph(true);
  693. task.set_task_id(end_graph_task_id_);
  694. task.set_stream_id(end_graph_stream_id_);
  695. task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
  696. task.mutable_op()->set_op_type(ENDGRAPH);
  697. op_mapping_info.mutable_task()->Add(std::move(task));
  698. is_end_graph_ = true;
  699. if (op_mapping_info.model_name_param_case() == aicpu::dump::OpMappingInfo::kModelName) {
  700. GELOGI("Add end_graph_info to aicpu, model_name is %s, task_id is %u, stream_id is %u",
  701. op_mapping_info.model_name().c_str(), end_graph_task_id_, end_graph_stream_id_);
  702. return;
  703. }
  704. GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
  705. }
  706. }
  707. void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void *op_debug_addr,
  708. aicpu::dump::OpMappingInfo &op_mapping_info) {
  709. if (is_op_debug_) {
  710. GELOGI("add op_debug_info to aicpu, task_id is %u, stream_id is %u", task_id, stream_id);
  711. aicpu::dump::Task task;
  712. task.set_end_graph(false);
  713. task.set_task_id(task_id);
  714. task.set_stream_id(stream_id);
  715. task.mutable_op()->set_op_name(NODE_NAME_OP_DEBUG);
  716. task.mutable_op()->set_op_type(OP_TYPE_OP_DEBUG);
  717. // set output
  718. aicpu::dump::Output output;
  719. output.set_data_type(DT_UINT8);
  720. output.set_format(FORMAT_ND);
  721. output.mutable_shape()->add_dim(kOpDebugShape);
  722. output.set_original_name(NODE_NAME_OP_DEBUG);
  723. output.set_original_output_index(0);
  724. output.set_original_output_format(FORMAT_ND);
  725. output.set_original_output_data_type(DT_UINT8);
  726. // due to lhisi virtual addr bug, cannot use args now
  727. output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_addr)));
  728. output.set_size(kOpDebugSize);
  729. task.mutable_output()->Add(std::move(output));
  730. op_mapping_info.mutable_task()->Add(std::move(task));
  731. }
  732. }
  733. Status DataDumper::UnloadDumpInfo() {
  734. if (!load_flag_) {
  735. GELOGI("No need to UnloadDumpInfo.");
  736. load_flag_ = false;
  737. return SUCCESS;
  738. }
  739. GELOGI("UnloadDumpInfo start.");
  740. aicpu::dump::OpMappingInfo op_mapping_info;
  741. op_mapping_info.set_model_id(model_id_);
  742. op_mapping_info.set_flag(kAicpuUnloadFlag);
  743. for (const auto &op_iter : op_list_) {
  744. aicpu::dump::Task task;
  745. task.set_task_id(op_iter.task_id);
  746. op_mapping_info.mutable_task()->Add(std::move(task));
  747. }
  748. auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
  749. if (ret != SUCCESS) {
  750. GELOGE(ret, "Execute unload dump info failed");
  751. return ret;
  752. }
  753. return SUCCESS;
  754. }
  755. void DataDumper::PrintCheckLog(string &dump_list_key) {
  756. std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
  757. if (model_list.empty()) {
  758. GELOGI("No model need dump.");
  759. return;
  760. }
  761. bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  762. bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  763. dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  764. GELOGI("%zu op need dump in known shape model %s.", op_list_.size(), dump_list_key.c_str());
  765. if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
  766. if (not_find_by_omname && not_find_by_modelname) {
  767. std::string model_list_str;
  768. for (auto &model : model_list) {
  769. model_list_str += "[" + model + "].";
  770. }
  771. GELOGW("Model %s will not be set to dump, dump list: %s", dump_list_key.c_str(), model_list_str.c_str());
  772. return;
  773. }
  774. }
  775. std::set<std::string> config_dump_op_list = dump_properties_.GetPropertyValue(dump_list_key);
  776. std::set<std::string> dump_op_list;
  777. for (auto &inner_dump_info : op_list_) {
  778. // oplist value OpDescPtr is not nullptr
  779. dump_op_list.insert(inner_dump_info.op->GetName());
  780. }
  781. for (auto &dump_op : config_dump_op_list) {
  782. if (dump_op_list.find(dump_op) == dump_op_list.end()) {
  783. GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
  784. }
  785. }
  786. }
  787. Status DataDumper::DumpExceptionInput(const OpDescInfo &op_desc_info, const string &dump_file) {
  788. GELOGI("Start to dump exception input");
  789. for (size_t i = 0; i < op_desc_info.input_addrs.size(); i++) {
  790. if (Debug::DumpDevMem(dump_file.data(), op_desc_info.input_addrs.at(i), op_desc_info.input_size.at(i)) != SUCCESS) {
  791. GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
  792. return PARAM_INVALID;
  793. }
  794. }
  795. return SUCCESS;
  796. }
  797. Status DataDumper::DumpExceptionOutput(const OpDescInfo &op_desc_info, const string &dump_file) {
  798. GELOGI("Start to dump exception output");
  799. for (size_t i = 0; i < op_desc_info.output_addrs.size(); i++) {
  800. if (Debug::DumpDevMem(dump_file.data(), op_desc_info.output_addrs.at(i), op_desc_info.output_size.at(i)) !=
  801. SUCCESS) {
  802. GELOGE(PARAM_INVALID, "Dump the %zu input data failed", i);
  803. return PARAM_INVALID;
  804. }
  805. }
  806. return SUCCESS;
  807. }
  808. Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exception_infos) {
  809. GELOGI("Start to dump exception info");
  810. for (const rtExceptionInfo &iter : exception_infos) {
  811. OpDescInfo op_desc_info;
  812. if (GetOpDescInfo(iter.streamid, iter.taskid, op_desc_info)) {
  813. toolkit::dumpdata::DumpData dump_data;
  814. dump_data.set_version("2.0");
  815. dump_data.set_dump_time(GetNowTime());
  816. for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
  817. toolkit::dumpdata::OpInput input;
  818. input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));
  819. input.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.input_format[i]));
  820. for (auto dim : op_desc_info.input_shape[i]) {
  821. input.mutable_shape()->add_dim(dim);
  822. }
  823. input.set_size(op_desc_info.input_size[i]);
  824. GELOGI("The input size int exception is %ld", op_desc_info.input_size[i]);
  825. dump_data.mutable_input()->Add(std::move(input));
  826. }
  827. for (size_t j = 0; j < op_desc_info.output_format.size(); ++j) {
  828. toolkit::dumpdata::OpOutput output;
  829. output.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.output_data_type[j])));
  830. output.set_format(toolkit::dumpdata::OutputFormat(op_desc_info.output_format[j]));
  831. for (auto dim : op_desc_info.output_shape[j]) {
  832. output.mutable_shape()->add_dim(dim);
  833. }
  834. output.set_size(op_desc_info.output_size[j]);
  835. GELOGI("The output size int exception is %ld", op_desc_info.output_size[j]);
  836. dump_data.mutable_output()->Add(std::move(output));
  837. }
  838. uint64_t now_time = GetNowTime();
  839. std::string op_name = op_desc_info.op_name;
  840. std::string op_type = op_desc_info.op_type;
  841. ReplaceStringElem(op_name);
  842. ReplaceStringElem(op_type);
  843. string dump_file_path =
  844. "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time);
  845. GELOGI("The exception dump file path is %s", dump_file_path.c_str());
  846. uint64_t proto_size = dump_data.ByteSizeLong();
  847. unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
  848. bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
  849. if (!ret || proto_size == 0) {
  850. GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
  851. return PARAM_INVALID;
  852. }
  853. GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), &proto_size, sizeof(uint64_t)),
  854. "Failed to dump proto size");
  855. GE_CHK_STATUS_RET(MemoryDumper::DumpToFile(dump_file_path.c_str(), proto_msg.get(), proto_size),
  856. "Failed to dump proto msg");
  857. if (DumpExceptionInput(op_desc_info, dump_file_path) != SUCCESS) {
  858. GELOGE(PARAM_INVALID, "Dump exception input failed");
  859. return PARAM_INVALID;
  860. }
  861. if (DumpExceptionOutput(op_desc_info, dump_file_path) != SUCCESS) {
  862. GELOGE(PARAM_INVALID, "Dump exception output failed");
  863. return PARAM_INVALID;
  864. }
  865. GELOGI("Dump exception info SUCCESS");
  866. } else {
  867. GELOGE(PARAM_INVALID, "Get op desc info failed,task id:%u,stream id:%u", iter.taskid, iter.streamid);
  868. return PARAM_INVALID;
  869. }
  870. }
  871. return SUCCESS;
  872. }
  873. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示