You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

data_dumper.cc 21 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/load/new_model_manager/data_dumper.h"
  17. #include <ctime>
  18. #include <map>
  19. #include <utility>
  20. #include <vector>
  21. #include "common/debug/log.h"
  22. #include "common/properties_manager.h"
  23. #include "framework/common/debug/ge_log.h"
  24. #include "framework/common/util.h"
  25. #include "graph/anchor.h"
  26. #include "graph/debug/ge_attr_define.h"
  27. #include "graph/load/new_model_manager/model_utils.h"
  28. #include "graph/utils/attr_utils.h"
  29. #include "graph/utils/tensor_utils.h"
  30. #include "proto/ge_ir.pb.h"
  31. #include "proto/op_mapping_info.pb.h"
  32. #include "runtime/mem.h"
  33. namespace {
  34. const uint32_t kAicpuLoadFlag = 1;
  35. const uint32_t kAicpuUnloadFlag = 0;
  36. const char *const kDumpOutput = "output";
  37. const char *const kDumpInput = "input";
  38. const char *const kDumpAll = "all";
  39. } // namespace
  40. static int32_t GetIrDataType(ge::DataType data_type) {
  41. static const std::map<ge::DataType, ge::proto::DataType> data_type_map = {
  42. {ge::DT_UNDEFINED, ge::proto::DT_UNDEFINED},
  43. {ge::DT_FLOAT, ge::proto::DT_FLOAT},
  44. {ge::DT_FLOAT16, ge::proto::DT_FLOAT16},
  45. {ge::DT_INT8, ge::proto::DT_INT8},
  46. {ge::DT_UINT8, ge::proto::DT_UINT8},
  47. {ge::DT_INT16, ge::proto::DT_INT16},
  48. {ge::DT_UINT16, ge::proto::DT_UINT16},
  49. {ge::DT_INT32, ge::proto::DT_INT32},
  50. {ge::DT_INT64, ge::proto::DT_INT64},
  51. {ge::DT_UINT32, ge::proto::DT_UINT32},
  52. {ge::DT_UINT64, ge::proto::DT_UINT64},
  53. {ge::DT_BOOL, ge::proto::DT_BOOL},
  54. {ge::DT_DOUBLE, ge::proto::DT_DOUBLE},
  55. {ge::DT_DUAL, ge::proto::DT_DUAL},
  56. {ge::DT_DUAL_SUB_INT8, ge::proto::DT_DUAL_SUB_INT8},
  57. {ge::DT_DUAL_SUB_UINT8, ge::proto::DT_DUAL_SUB_UINT8},
  58. {ge::DT_COMPLEX64, ge::proto::DT_COMPLEX64},
  59. {ge::DT_COMPLEX128, ge::proto::DT_COMPLEX128},
  60. {ge::DT_QINT8, ge::proto::DT_QINT8},
  61. {ge::DT_QINT16, ge::proto::DT_QINT16},
  62. {ge::DT_QINT32, ge::proto::DT_QINT32},
  63. {ge::DT_QUINT8, ge::proto::DT_QUINT8},
  64. {ge::DT_QUINT16, ge::proto::DT_QUINT16},
  65. {ge::DT_RESOURCE, ge::proto::DT_RESOURCE},
  66. {ge::DT_STRING_REF, ge::proto::DT_STRING_REF},
  67. {ge::DT_STRING, ge::proto::DT_STRING},
  68. };
  69. auto iter = data_type_map.find(data_type);
  70. if (iter == data_type_map.end()) {
  71. return static_cast<int32_t>(ge::proto::DT_UNDEFINED);
  72. }
  73. return static_cast<int32_t>(iter->second);
  74. }
  75. namespace ge {
  76. DataDumper::~DataDumper() {
  77. ReleaseDevMem(&dev_mem_load_);
  78. ReleaseDevMem(&dev_mem_unload_);
  79. }
  80. void DataDumper::ReleaseDevMem(void **ptr) noexcept {
  81. if (ptr == nullptr) {
  82. return;
  83. }
  84. if (*ptr != nullptr) {
  85. rtError_t rt_ret = rtFree(*ptr);
  86. if (rt_ret != RT_ERROR_NONE) {
  87. GELOGE(RT_FAILED, "Call rtFree failed, ret: 0x%X", rt_ret);
  88. }
  89. *ptr = nullptr;
  90. }
  91. }
  92. void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond) {
  93. global_step_ = reinterpret_cast<uintptr_t>(global_step);
  94. loop_per_iter_ = reinterpret_cast<uintptr_t>(loop_per_iter);
  95. loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
  96. }
  97. void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
  98. GELOGI("Start to save data %s message", node->GetName().c_str());
  99. if (node != nullptr) {
  100. auto input_op_desc = node->GetOpDesc();
  101. if (input_op_desc == nullptr) {
  102. GELOGE(PARAM_INVALID, "input op desc is null.");
  103. return;
  104. }
  105. for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
  106. for (auto &dst_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) {
  107. ge::NodePtr dst_node = dst_in_data_anchor->GetOwnerNode();
  108. auto op_desc = dst_node->GetOpDesc();
  109. if (op_desc == nullptr) {
  110. GELOGE(PARAM_INVALID, "input op desc is null.");
  111. return;
  112. }
  113. input_map_.insert(
  114. {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
  115. }
  116. }
  117. GELOGI("Save data message successfully");
  118. }
  119. }
  120. void DataDumper::SaveEndGraphId(uint32_t task_id, uint32_t stream_id) {
  121. end_graph_task_id_ = task_id;
  122. end_graph_stream_id_ = stream_id;
  123. }
  124. void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc,
  125. uintptr_t args) {
  126. if (op_desc == nullptr) {
  127. GELOGE(PARAM_INVALID, "Opdesc is nullptr");
  128. return;
  129. }
  130. GELOGI("Save dump task %s, task id: %u, stream id: %u", op_desc->GetName().c_str(), task_id, stream_id);
  131. op_list_.push_back({task_id, stream_id, op_desc, args, true});
  132. for (auto iter = input_map_.equal_range(op_desc->GetName()); iter.first != iter.second; ++iter.first) {
  133. InnerInputMapping &inner_input_mapping = iter.first->second;
  134. auto &data_op = inner_input_mapping.data_op;
  135. if (data_op == nullptr) {
  136. GELOGE(PARAM_INVALID, "data_op is null.");
  137. return;
  138. }
  139. auto input_tensor = op_desc->GetInputDescPtr(inner_input_mapping.input_anchor_index);
  140. if (input_tensor == nullptr) {
  141. GELOGE(PARAM_INVALID, "input_tensor is null, index: %d, size: %zu.", inner_input_mapping.input_anchor_index,
  142. op_desc->GetInputsSize());
  143. return;
  144. }
  145. int64_t data_size = 0;
  146. if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
  147. GELOGI("Get aipp data size according to attr is %ld", data_size);
  148. } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
  149. GELOGE(PARAM_INVALID, "Get input size filed");
  150. return;
  151. }
  152. GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
  153. stream_id, data_size);
  154. op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
  155. inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
  156. }
  157. }
  158. static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
  159. aicpu::dump::OpMappingInfo &op_mapping_info) {
  160. if (step_id != 0) {
  161. GELOGI("step_id exists.");
  162. op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
  163. } else {
  164. GELOGI("step_id is null.");
  165. }
  166. if (loop_per_iter != 0) {
  167. GELOGI("loop_per_iter exists.");
  168. op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
  169. } else {
  170. GELOGI("loop_per_iter is null.");
  171. }
  172. if (loop_cond != 0) {
  173. GELOGI("loop_cond exists.");
  174. op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
  175. } else {
  176. GELOGI("loop_cond is null.");
  177. }
  178. }
  179. Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  180. GELOGI("Start dump output");
  181. if (inner_dump_info.is_task) {
  182. // tbe or aicpu op
  183. const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
  184. const auto input_size = inner_dump_info.op->GetAllInputsDesc().size();
  185. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false);
  186. if (output_descs.size() != output_addrs.size()) {
  187. GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
  188. inner_dump_info.op->GetName().c_str(), output_descs.size());
  189. return PARAM_INVALID;
  190. }
  191. for (size_t i = 0; i < output_descs.size(); ++i) {
  192. aicpu::dump::Output output;
  193. output.set_data_type(static_cast<int32_t>(GetIrDataType(output_descs.at(i).GetDataType())));
  194. output.set_format(static_cast<int32_t>(output_descs.at(i).GetFormat()));
  195. for (auto dim : output_descs.at(i).GetShape().GetDims()) {
  196. output.mutable_shape()->add_dim(dim);
  197. }
  198. int64_t output_size = 0;
  199. if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
  200. GELOGE(PARAM_INVALID, "Get output size filed");
  201. return PARAM_INVALID;
  202. }
  203. GELOGI("Get output size in dump is %ld", output_size);
  204. std::string origin_name;
  205. int32_t origin_output_index = -1;
  206. (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  207. (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  208. GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size);
  209. return PARAM_INVALID)
  210. output.set_size(output_size);
  211. output.set_original_name(origin_name);
  212. output.set_original_output_index(origin_output_index);
  213. output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat()));
  214. output.set_original_output_data_type(static_cast<int32_t>(output_descs.at(i).GetOriginDataType()));
  215. output.set_address(static_cast<uint64_t>(inner_dump_info.args + (i + input_size) * sizeof(void *)));
  216. task.mutable_output()->Add(std::move(output));
  217. }
  218. return SUCCESS;
  219. }
  220. // else data, const or variable op
  221. aicpu::dump::Output output;
  222. auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
  223. const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op, false);
  224. if (output_tensor == nullptr) {
  225. GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
  226. inner_dump_info.op->GetOutputsSize());
  227. return PARAM_INVALID;
  228. }
  229. output.set_data_type(static_cast<int32_t>(GetIrDataType(output_tensor->GetDataType())));
  230. output.set_format(static_cast<int32_t>(output_tensor->GetFormat()));
  231. for (auto dim : inner_dump_info.dims) {
  232. output.mutable_shape()->add_dim(dim);
  233. }
  234. std::string origin_name;
  235. int32_t origin_output_index = -1;
  236. (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
  237. (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
  238. GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0,
  239. GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size);
  240. return PARAM_INVALID)
  241. output.set_size(inner_dump_info.data_size);
  242. output.set_original_name(origin_name);
  243. output.set_original_output_index(origin_output_index);
  244. output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
  245. output.set_original_output_data_type(static_cast<int32_t>(output_tensor->GetOriginDataType()));
  246. // due to lhisi virtual addr bug, cannot use args now
  247. if (inner_dump_info.output_anchor_index >= static_cast<int>(output_addrs.size())) {
  248. GELOGE(FAILED, "Index is out of range.");
  249. return FAILED;
  250. }
  251. auto data_addr = inner_dump_info.args + sizeof(void *) * static_cast<uint32_t>(inner_dump_info.input_anchor_index);
  252. output.set_address(static_cast<uint64_t>(data_addr));
  253. task.mutable_output()->Add(std::move(output));
  254. return SUCCESS;
  255. }
  256. Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  257. GELOGI("Start dump input");
  258. const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
  259. const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op, false);
  260. if (input_descs.size() != input_addrs.size()) {
  261. GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
  262. inner_dump_info.op->GetName().c_str(), input_descs.size());
  263. return PARAM_INVALID;
  264. }
  265. for (size_t i = 0; i < input_descs.size(); ++i) {
  266. aicpu::dump::Input input;
  267. input.set_data_type(static_cast<int32_t>(GetIrDataType(input_descs.at(i).GetDataType())));
  268. input.set_format(static_cast<int32_t>(input_descs.at(i).GetFormat()));
  269. for (auto dim : input_descs.at(i).GetShape().GetDims()) {
  270. input.mutable_shape()->add_dim(dim);
  271. }
  272. int64_t input_size = 0;
  273. if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
  274. GELOGI("Get aipp input size according to attr is %ld", input_size);
  275. } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
  276. GELOGE(PARAM_INVALID, "Get input size filed");
  277. return PARAM_INVALID;
  278. }
  279. GELOGI("Get input size in dump is %ld", input_size);
  280. GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size);
  281. return PARAM_INVALID;)
  282. input.set_size(input_size);
  283. input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i));
  284. task.mutable_input()->Add(std::move(input));
  285. }
  286. return SUCCESS;
  287. }
  288. Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  289. std::string proto_str;
  290. size_t proto_size = op_mapping_info.ByteSizeLong();
  291. bool ret = op_mapping_info.SerializeToString(&proto_str);
  292. if (!ret || proto_size == 0) {
  293. GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  294. return FAILED;
  295. }
  296. if (dev_mem_load_ != nullptr) {
  297. GELOGW("dev_mem_load_ has been used.");
  298. ReleaseDevMem(&dev_mem_load_);
  299. }
  300. rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
  301. if (rt_ret != RT_ERROR_NONE) {
  302. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  303. return RT_FAILED;
  304. }
  305. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "load dump information.", proto_size)
  306. rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  307. if (rt_ret != RT_ERROR_NONE) {
  308. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  309. return RT_FAILED;
  310. }
  311. rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
  312. if (rt_ret != RT_ERROR_NONE) {
  313. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  314. return RT_FAILED;
  315. }
  316. load_flag_ = true;
  317. GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
  318. return SUCCESS;
  319. }
  320. Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_info) {
  321. std::string proto_str;
  322. size_t proto_size = op_mapping_info.ByteSizeLong();
  323. bool ret = op_mapping_info.SerializeToString(&proto_str);
  324. if (!ret || proto_size == 0) {
  325. GELOGE(FAILED, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
  326. return FAILED;
  327. }
  328. if (dev_mem_unload_ != nullptr) {
  329. GELOGW("dev_mem_unload_ has been used.");
  330. ReleaseDevMem(&dev_mem_unload_);
  331. }
  332. rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
  333. if (rt_ret != RT_ERROR_NONE) {
  334. GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
  335. return RT_FAILED;
  336. }
  337. GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "unload dump information.", proto_size)
  338. rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
  339. if (rt_ret != RT_ERROR_NONE) {
  340. GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
  341. return RT_FAILED;
  342. }
  343. rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
  344. if (rt_ret != RT_ERROR_NONE) {
  345. GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
  346. return RT_FAILED;
  347. }
  348. load_flag_ = false;
  349. GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
  350. return SUCCESS;
  351. }
  352. Status DataDumper::LoadDumpInfo() {
  353. std::string dump_list_key;
  354. PrintCheckLog(dump_list_key);
  355. if (op_list_.empty()) {
  356. return SUCCESS;
  357. }
  358. aicpu::dump::OpMappingInfo op_mapping_info;
  359. auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
  360. op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
  361. op_mapping_info.set_model_name(dump_list_key);
  362. op_mapping_info.set_model_id(model_id_);
  363. op_mapping_info.set_flag(kAicpuLoadFlag);
  364. op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep());
  365. SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
  366. GELOGI("Dump step is %s and dump path is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(),
  367. dump_path.c_str());
  368. for (const auto &op_iter : op_list_) {
  369. aicpu::dump::Task task;
  370. auto op_desc = op_iter.op;
  371. task.set_end_graph(false);
  372. task.set_task_id(op_iter.task_id);
  373. task.set_stream_id(op_iter.stream_id);
  374. task.mutable_op()->set_op_name(op_desc->GetName());
  375. task.mutable_op()->set_op_type(op_desc->GetType());
  376. if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput) {
  377. if (DumpOutput(op_iter, task) != SUCCESS) {
  378. GELOGE(FAILED, "Dump output failed");
  379. return FAILED;
  380. }
  381. op_mapping_info.mutable_task()->Add(std::move(task));
  382. continue;
  383. }
  384. if (PropertiesManager::Instance().GetDumpMode() == kDumpInput) {
  385. if (op_iter.is_task) {
  386. if (DumpInput(op_iter, task) != SUCCESS) {
  387. GELOGE(FAILED, "Dump input failed");
  388. return FAILED;
  389. }
  390. }
  391. op_mapping_info.mutable_task()->Add(std::move(task));
  392. continue;
  393. }
  394. if (PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
  395. auto ret = DumpOutput(op_iter, task);
  396. if (ret != SUCCESS) {
  397. GELOGE(FAILED, "Dump output failed when in dumping all");
  398. return FAILED;
  399. }
  400. if (op_iter.is_task) {
  401. ret = DumpInput(op_iter, task);
  402. if (ret != SUCCESS) {
  403. GELOGE(FAILED, "Dump input failed when in dumping all");
  404. return FAILED;
  405. }
  406. }
  407. op_mapping_info.mutable_task()->Add(std::move(task));
  408. continue;
  409. }
  410. }
  411. SetEndGraphIdToAicpu(end_graph_task_id_, end_graph_stream_id_, op_mapping_info);
  412. auto ret = ExecuteLoadDumpInfo(op_mapping_info);
  413. if (ret != SUCCESS) {
  414. GELOGE(FAILED, "Execute load dump info failed");
  415. return FAILED;
  416. }
  417. return SUCCESS;
  418. }
  419. void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
  420. aicpu::dump::OpMappingInfo &op_mapping_info) {
  421. if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput ||
  422. PropertiesManager::Instance().GetDumpMode() == kDumpInput ||
  423. PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
  424. GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
  425. aicpu::dump::Task task;
  426. task.set_end_graph(true);
  427. task.set_task_id(end_graph_task_id_);
  428. task.set_stream_id(end_graph_stream_id_);
  429. task.mutable_op()->set_op_name(NODE_NAME_END_GRAPH);
  430. task.mutable_op()->set_op_type(ENDGRAPH);
  431. op_mapping_info.mutable_task()->Add(std::move(task));
  432. }
  433. }
  434. Status DataDumper::UnloadDumpInfo() {
  435. if (!load_flag_) {
  436. GELOGI("No need to UnloadDumpInfo.");
  437. load_flag_ = false;
  438. return SUCCESS;
  439. }
  440. GELOGI("UnloadDumpInfo start.");
  441. aicpu::dump::OpMappingInfo op_mapping_info;
  442. op_mapping_info.set_model_id(model_id_);
  443. op_mapping_info.set_flag(kAicpuUnloadFlag);
  444. for (const auto &op_iter : op_list_) {
  445. aicpu::dump::Task task;
  446. task.set_task_id(op_iter.task_id);
  447. op_mapping_info.mutable_task()->Add(std::move(task));
  448. }
  449. auto ret = ExecuteUnLoadDumpInfo(op_mapping_info);
  450. if (ret != SUCCESS) {
  451. GELOGE(FAILED, "Execute unload dump info failed");
  452. return FAILED;
  453. }
  454. return SUCCESS;
  455. }
  456. void DataDumper::PrintCheckLog(string &dump_list_key) {
  457. std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel();
  458. if (model_list.empty()) {
  459. GELOGI("No model need dump.");
  460. return;
  461. }
  462. GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str());
  463. bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
  464. bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
  465. if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
  466. if (not_find_by_omname && not_find_by_modelname) {
  467. std::string model_list_str;
  468. for (auto &model : model_list) {
  469. model_list_str += "[" + model + "].";
  470. }
  471. GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
  472. return;
  473. }
  474. }
  475. dump_list_key = not_find_by_omname ? model_name_ : om_name_;
  476. std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key);
  477. std::set<std::string> dump_op_list;
  478. for (auto &inner_dump_info : op_list_) {
  479. // oplist value OpDescPtr is not nullptr
  480. dump_op_list.insert(inner_dump_info.op->GetName());
  481. }
  482. for (auto &dump_op : config_dump_op_list) {
  483. if (dump_op_list.find(dump_op) == dump_op_list.end()) {
  484. GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
  485. }
  486. }
  487. }
  488. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示