You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

hybrid_model_async_executor.cc 23 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "hybrid/executor/hybrid_model_async_executor.h"
  17. #include "graph/load/model_manager/model_utils.h"
  18. #include "graph/utils/tensor_utils.h"
  19. #include "graph/utils/type_utils.h"
  20. #include "graph/ge_context.h"
  21. #include "omm/csa_interact.h"
  22. namespace ge {
  23. namespace hybrid {
  24. namespace {
  25. const int kDataOutputIndex = 0;
  26. const size_t kMinimumPiplineStages = 2;
  27. const int kDefaultLoopCount = 10;
  28. }
  29. HybridModelAsyncExecutor::HybridModelAsyncExecutor(HybridModel *model)
  30. : model_(model), run_flag_(false), data_dumper_(nullptr) {
  31. }
  32. HybridModelAsyncExecutor::~HybridModelAsyncExecutor() {
  33. if (stream_ != nullptr) {
  34. GE_CHK_RT(rtStreamDestroy(stream_));
  35. }
  36. }
  37. void HybridModelAsyncExecutor::SetDeviceId(uint32_t device_id) {
  38. device_id_ = device_id;
  39. }
  40. void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) {
  41. model_id_ = model_id;
  42. }
  43. void HybridModelAsyncExecutor::SetModelName(const string &model_name) {
  44. om_name_ = model_name;
  45. }
  46. Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
  47. GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL,
  48. "Data queue is full, please call again later, model_id %u ", model_id_);
  49. GELOGD("EnqueueData successfully. model_id = %u, data_index = %u", data->GetInput().model_id, data->GetInput().index);
  50. return SUCCESS;
  51. }
  52. Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &listener) {
  53. GELOGD("HybridModelExecutor::Start IN, has listener = %d", listener != nullptr);
  54. std::lock_guard<std::mutex> lk(mu_);
  55. GE_CHK_BOOL_RET_STATUS(!run_flag_, INTERNAL_ERROR, "Model already started.");
  56. run_flag_ = true;
  57. listener_ = listener;
  58. future_ = std::async(std::launch::async, [&]() -> Status {
  59. GetThreadLocalContext() = *executor_->GetContext()->ge_context;
  60. GetContext().SetSessionId(executor_->GetContext()->session_id);
  61. return RunInternal();
  62. });
  63. GE_CHK_BOOL_RET_STATUS(future_.valid(), INTERNAL_ERROR, "Failed to start.");
  64. GELOGD("HybridModelExecutor::Start successfully");
  65. return SUCCESS;
  66. }
  67. Status HybridModelAsyncExecutor::Stop() {
  68. std::lock_guard<std::mutex> lk(mu_);
  69. run_flag_ = false;
  70. data_inputer_->Stop();
  71. Status ret = SUCCESS;
  72. if (future_.valid()) {
  73. ret = future_.get();
  74. }
  75. if (is_op_debug_reg_) {
  76. op_debug_register_.UnregisterDebugForStream(stream_);
  77. }
  78. if (stream_ != nullptr) {
  79. GE_CHK_RT(rtStreamDestroy(stream_));
  80. stream_ = nullptr;
  81. }
  82. return ret;
  83. }
  84. Status HybridModelAsyncExecutor::Init() {
  85. data_inputer_ = std::unique_ptr<DataInputer>(new(std::nothrow) DataInputer());
  86. GE_CHECK_NOTNULL(data_inputer_);
  87. GE_CHK_RT_RET(rtStreamCreate(&stream_, RT_STREAM_PRIORITY_DEFAULT));
  88. executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
  89. GE_CHECK_NOTNULL(executor_);
  90. GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
  91. GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");
  92. GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
  93. if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
  94. pipe_executor_ =
  95. std::unique_ptr<HybridModelPipelineExecutor>(new(std::nothrow) HybridModelPipelineExecutor(model_, device_id_));
  96. GE_CHECK_NOTNULL(pipe_executor_);
  97. GE_CHK_STATUS_RET(pipe_executor_->Init(), "Failed to init hybrid engine");
  98. }
  99. GE_CHK_STATUS_RET(InitInputDesc(), "Failed to init input tensors");
  100. return SUCCESS;
  101. }
  102. Status HybridModelAsyncExecutor::PreRun(InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  103. GE_CHK_STATUS_RET(SyncVarData(), "Failed to sync var data");
  104. RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[SyncVarData] End");
  105. GE_CHK_STATUS_RET(PrepareInputs(current_data, args), "Failed to copy input data to model");
  106. RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[CopyInputData] End");
  107. return SUCCESS;
  108. }
  109. Status HybridModelAsyncExecutor::RunInternal() {
  110. auto device_id = static_cast<int32_t>(device_id_);
  111. GELOGD("Hybrid model start. model_id = %u, device_id = %u", model_id_, device_id_);
  112. GE_CHK_RT_RET(rtSetDevice(device_id));
  113. // DeviceReset before thread run finished!
  114. GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(device_id)); });
  115. while (run_flag_) {
  116. std::shared_ptr<InputDataWrapper> data_wrapper;
  117. Status ret = data_inputer_->Pop(data_wrapper);
  118. if (data_wrapper == nullptr || ret != SUCCESS) {
  119. GELOGI("data_wrapper is null!, ret = %u", ret);
  120. continue;
  121. }
  122. GELOGI("Getting the input data, model_id:%u", model_id_);
  123. GE_IF_BOOL_EXEC(!run_flag_, break);
  124. InputData current_data = data_wrapper->GetInput();
  125. GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id_, current_data.index);
  126. RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] Start", iterator_count_);
  127. HybridModelExecutor::ExecuteArgs args;
  128. ret = PreRun(current_data, args);
  129. GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
  130. ret != SUCCESS, (void) HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
  131. CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
  132. continue, "PreRun failed."); // [No need to check value]
  133. if (pipe_executor_ != nullptr) {
  134. GELOGI("HybridModel will execute in pipeline mode");
  135. auto iter_per_run = std::getenv("ITER_NUM");
  136. if (iter_per_run) {
  137. args.num_loops = static_cast<int>(strtol(iter_per_run, nullptr, kDefaultLoopCount));
  138. }
  139. ret = pipe_executor_->Execute(args);
  140. } else {
  141. GELOGI("HybridModel will execute in singleline mode");
  142. ge::GetContext().SetSessionId(executor_->GetContext()->session_id);
  143. ret = executor_->Execute(args);
  144. }
  145. ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
  146. if (ret != SUCCESS) {
  147. CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_EXEC);
  148. continue;
  149. }
  150. RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] End", iterator_count_);
  151. iterator_count_++;
  152. GELOGI("run iterator count is %lu", iterator_count_);
  153. }
  154. CsaInteract::GetInstance().WriteInternalErrorCode();
  155. GELOGI("Model run end, model id:%u", model_id_);
  156. return SUCCESS;
  157. }
  158. Status HybridModelAsyncExecutor::HandleResult(Status exec_ret,
  159. uint32_t data_id,
  160. HybridModelExecutor::ExecuteArgs &args,
  161. OutputData *output_data) {
  162. GELOGD("Start to handle result. model id = %u, data index = %u, execution ret = %u", model_id_, data_id, exec_ret);
  163. std::vector<ge::OutputTensorInfo> output_tensor_info_list;
  164. if (args.is_eos) {
  165. GELOGI("End of sequence, model id = %u", model_id_);
  166. GE_CHK_STATUS_RET_NOLOG(OnComputeDone(data_id, END_OF_SEQUENCE, output_tensor_info_list));
  167. return SUCCESS;
  168. }
  169. if (exec_ret != SUCCESS) {
  170. GELOGE(exec_ret, "Failed to execute graph. model_id = %u", model_id_);
  171. return OnComputeDone(data_id, INTERNAL_ERROR, output_tensor_info_list);
  172. }
  173. GE_CHECK_NOTNULL(output_data);
  174. auto ret = CopyOutputs(args, output_data, output_tensor_info_list);
  175. if (ret != SUCCESS) {
  176. OnComputeDone(data_id, INTERNAL_ERROR, output_tensor_info_list);
  177. return INTERNAL_ERROR;
  178. }
  179. GELOGD("Executed graph successfully, model id = %u, data_index = %u", model_id_, data_id);
  180. return OnComputeDone(data_id, SUCCESS, output_tensor_info_list);
  181. }
  182. Status HybridModelAsyncExecutor::SyncVarData() {
  183. GELOGI("Sync var data, model id:%u", model_id_);
  184. TensorValue *global_step_var = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
  185. if (global_step_var != nullptr) {
  186. std::vector<uint64_t> v_step;
  187. v_step.push_back(iterator_count_);
  188. GE_CHK_RT_RET(rtMemcpy(global_step_var->MutableData(),
  189. global_step_var->GetSize(),
  190. v_step.data(),
  191. v_step.size() * sizeof(uint64_t),
  192. RT_MEMCPY_HOST_TO_DEVICE));
  193. } else {
  194. GELOGD("No GLOBAL_STEP variable was found.");
  195. }
  196. return SUCCESS;
  197. }
  198. Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
  199. if (current_data.blobs.size() < input_tensor_desc_.size()) {
  200. GELOGE(PARAM_INVALID, "Blob size mismatches, expect at least %zu, but got %zu",
  201. input_tensor_desc_.size(), current_data.blobs.size());
  202. return PARAM_INVALID;
  203. }
  204. auto allocator = NpuMemoryAllocator::GetAllocator(device_id_);
  205. GE_CHECK_NOTNULL(allocator);
  206. args.input_desc.resize(input_tensor_desc_.size());
  207. const std::vector<DataBuffer> &blobs = current_data.blobs;
  208. for (size_t input_index = 0; input_index < input_tensor_desc_.size(); ++input_index) {
  209. auto tensor_size = input_sizes_[input_index];
  210. if (is_input_dynamic_[input_index]) {
  211. if (input_index >= current_data.shapes.size()) {
  212. GELOGE(PARAM_INVALID, "Shape index out of range, index = %zu, shape size = %zu",
  213. input_index, current_data.shapes.size());
  214. return PARAM_INVALID;
  215. }
  216. auto &tensor_desc = input_tensor_desc_[input_index];
  217. GeShape shape(current_data.shapes[input_index]);
  218. std::vector<std::pair<int64_t, int64_t>> range;
  219. auto range_ret = tensor_desc->GetShapeRange(range);
  220. GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR,
  221. "Get shape range failed, ret=%u.", range_ret);
  222. for (size_t k = 0; k < range.size(); ++k) {
  223. if (k >= shape.GetDimNum()) {
  224. break;
  225. }
  226. // range[k].second can be -1
  227. if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) {
  228. GELOGE(PARAM_INVALID, "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld]",
  229. input_index, k, shape.GetDim(k), range[k].first, range[k].second);
  230. return PARAM_INVALID;
  231. }
  232. }
  233. tensor_desc->SetShape(shape);
  234. args.input_desc[input_index] = tensor_desc;
  235. GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
  236. GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
  237. "Failed to calc tensor size, index = %zu, shape = [%s]",
  238. input_index,
  239. tensor_desc->GetShape().ToString().c_str());
  240. GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
  241. }
  242. GE_CHECK_GE(tensor_size, 0);
  243. AllocationAttr attr;
  244. if (GetContext().GetHostExecFlag()) {
  245. attr.SetMemType(HOST_DDR);
  246. }
  247. auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size, &attr);
  248. GE_CHECK_NOTNULL(tensor_buffer);
  249. args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release()));
  250. GELOGD("To copy input data for input[%zu]", input_index);
  251. const DataBuffer &data_buf = blobs[input_index];
  252. auto mem_size = static_cast<uint64_t>(tensor_size);
  253. GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length,
  254. PARAM_INVALID,
  255. "input data size(%lu) does not match model required size(%lu), ret failed.",
  256. data_buf.length,
  257. mem_size);
  258. GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]",
  259. model_->root_runtime_param_.graph_id,
  260. input_index,
  261. args.inputs[input_index].GetData(),
  262. mem_size,
  263. data_buf.length);
  264. GE_CHK_RT_RET(rtMemcpy(args.inputs[input_index].MutableData(),
  265. mem_size,
  266. data_buf.data,
  267. data_buf.length,
  268. RT_MEMCPY_HOST_TO_DEVICE));
  269. }
  270. return SUCCESS;
  271. }
  272. Status HybridModelAsyncExecutor::InitInputDesc() {
  273. int input_index = 0;
  274. for (const auto &input_node : model_->GetRootGraphItem()->GetInputNodes()) {
  275. GELOGD("Init input[%u], node = %s, is_dynamic = %d",
  276. input_index,
  277. input_node->NodeName().c_str(),
  278. input_node->is_dynamic);
  279. auto output_desc = input_node->MutableOutputDesc(kDataOutputIndex);
  280. GE_CHECK_NOTNULL(output_desc);
  281. int64_t tensor_size = -1;
  282. if (!input_node->is_dynamic) {
  283. GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*output_desc, tensor_size),
  284. "Failed to get size from %s",
  285. input_node->NodeName().c_str());
  286. if (tensor_size == 0) {
  287. GELOGW("[%s] Tensor size == 0", input_node->NodeName().c_str());
  288. GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*output_desc, tensor_size),
  289. "Failed to calc tensor size");
  290. GELOGD("[%s] Tensor size updated to %ld", input_node->NodeName().c_str(), tensor_size);
  291. }
  292. }
  293. input_sizes_.emplace(input_index, tensor_size);
  294. input_tensor_desc_.emplace(input_index, output_desc);
  295. is_input_dynamic_.push_back(input_node->is_dynamic);
  296. input_index += 1;
  297. }
  298. return SUCCESS;
  299. }
  300. Status HybridModelAsyncExecutor::OnComputeDone(uint32_t data_index, uint32_t result_code,
  301. std::vector<ge::OutputTensorInfo> &outputs) {
  302. GELOGD("OnComputeDone. model id = %u, data index = %u, execution ret = %u", model_id_, data_index, result_code);
  303. if (listener_ != nullptr) {
  304. GE_CHK_STATUS(listener_->OnComputeDone(model_id_, data_index, result_code, outputs),
  305. "OnComputeDone failed");
  306. }
  307. return result_code;
  308. }
  309. Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &args,
  310. OutputData *output_data,
  311. std::vector<ge::OutputTensorInfo> &outputs) {
  312. // copy output data from op to designated position
  313. std::vector<ConstGeTensorDescPtr> &output_tensor_desc_list = args.output_desc;
  314. std::vector<TensorValue> &output_tensors = args.outputs;
  315. if (output_tensor_desc_list.size() != output_tensors.size()) {
  316. GELOGE(INTERNAL_ERROR,
  317. "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu",
  318. output_tensor_desc_list.size(),
  319. output_tensors.size());
  320. return INTERNAL_ERROR;
  321. }
  322. GELOGD("Number of outputs = %zu", output_tensor_desc_list.size());
  323. for (size_t i = 0; i < output_tensors.size(); ++i) {
  324. GELOGD("Start to process output[%zu]", i);
  325. auto &output_tensor = output_tensors[i];
  326. auto &tensor_desc = output_tensor_desc_list.at(i);
  327. GE_CHECK_NOTNULL(tensor_desc);
  328. int64_t output_size = -1;
  329. GE_CHK_GRAPH_STATUS_RET(TensorUtils::CalcTensorMemSize(tensor_desc->GetShape(),
  330. tensor_desc->GetFormat(),
  331. tensor_desc->GetDataType(),
  332. output_size),
  333. "Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s",
  334. i,
  335. tensor_desc->GetShape().ToString().c_str(),
  336. TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(),
  337. TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str());
  338. GELOGD("Got tensor size for output[%zu] successfully. shape = [%s], type = %s, format = %s, size = %ld",
  339. i,
  340. tensor_desc->GetShape().ToString().c_str(),
  341. TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(),
  342. TypeUtils::FormatToSerialString(tensor_desc->GetFormat()).c_str(),
  343. output_size);
  344. GE_CHECK_GE(output_size, 0);
  345. GE_CHECK_LE(output_size, UINT32_MAX);
  346. if (output_tensor.GetSize() < static_cast<size_t>(output_size)) {
  347. GELOGE(INTERNAL_ERROR,
  348. "output[%zu] tensor size(%zu) is not enough for output shape [%s]",
  349. i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str());
  350. return INTERNAL_ERROR;
  351. }
  352. ge::OutputTensorInfo output;
  353. output.data_type = static_cast<uint32_t>(tensor_desc->GetDataType());
  354. output.dims = tensor_desc->GetShape().GetDims();
  355. output.length = output_size;
  356. if (output_size > 0) {
  357. std::unique_ptr<uint8_t[]> data_buf(new(std::nothrow) uint8_t[output_size]);
  358. GE_CHECK_NOTNULL(data_buf);
  359. GE_CHK_RT_RET(rtMemcpy(data_buf.get(),
  360. output_size,
  361. output_tensor.GetData(),
  362. output_size,
  363. RT_MEMCPY_DEVICE_TO_HOST));
  364. output.data = std::move(data_buf);
  365. output_data->blobs.emplace_back(data_buf.get(), static_cast<uint32_t>(output_size), false);
  366. } else {
  367. GELOGW("Output[%zu] is empty. shape = [%s]", i, tensor_desc->GetShape().ToString().c_str());
  368. output.data = nullptr;
  369. output_data->blobs.emplace_back(nullptr, 0U, false);
  370. }
  371. outputs.emplace_back(std::move(output));
  372. GELOGD("Output[%zu] added, type = %s, shape = [%s], size = %ld",
  373. i,
  374. TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(),
  375. tensor_desc->GetShape().ToString().c_str(),
  376. output_size);
  377. }
  378. return SUCCESS;
  379. }
  380. Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
  381. const std::vector<GeTensorDesc> &input_desc,
  382. std::vector<DataBuffer> &outputs,
  383. std::vector<GeTensorDesc> &output_desc) {
  384. GELOGI("Start to execute model.");
  385. HybridModelExecutor::ExecuteArgs args;
  386. args.inputs.resize(inputs.size());
  387. for (size_t i = 0; i < inputs.size(); ++i) {
  388. TensorValue tensor_value(inputs[i].data, inputs[i].length);
  389. args.inputs[i] = tensor_value;
  390. }
  391. for (size_t i = 0; i < outputs.size(); ++i) {
  392. args.outputs.emplace_back(TensorValue(outputs[i].data, outputs[i].length));
  393. }
  394. // usr must designate input tensorDesc when input shape is dynamic in inference
  395. for (size_t i = 0; i < input_desc.size(); ++i) {
  396. ConstGeTensorDescPtr tensor_desc_ptr = MakeShared<GeTensorDesc>(input_desc[i]);
  397. args.input_desc.emplace_back(tensor_desc_ptr);
  398. }
  399. GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
  400. for (const auto &output_tensor_desc : args.output_desc) {
  401. output_desc.emplace_back(*output_tensor_desc);
  402. }
  403. return SUCCESS;
  404. }
  405. Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
  406. GELOGD("Start to execute model.");
  407. // prepare inputs
  408. InputData input_data;
  409. for (auto &tensor : inputs) {
  410. DataBuffer buffer;
  411. buffer.data = const_cast<uint8_t *>(tensor.GetData().GetData());
  412. buffer.length = tensor.GetData().size();
  413. input_data.blobs.emplace_back(buffer);
  414. input_data.shapes.emplace_back(tensor.GetTensorDesc().GetShape().GetDims());
  415. }
  416. HybridModelExecutor::ExecuteArgs args;
  417. GE_CHK_STATUS_RET(PrepareInputs(input_data, args), "Failed to copy input data to model");
  418. GELOGD("Done copying input data successfully.");
  419. GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
  420. std::vector<ge::OutputTensorInfo> output_tensor_info_list;
  421. OutputData output_data;
  422. GE_CHK_STATUS_RET(CopyOutputs(args, &output_data, output_tensor_info_list), "Failed to copy outputs.");
  423. GELOGD("Done copying output data successfully. output count = %zu", output_tensor_info_list.size());
  424. int out_index = 0;
  425. outputs.resize(output_tensor_info_list.size());
  426. for (auto &out_tensor_info : output_tensor_info_list) {
  427. auto &ge_tensor = outputs[out_index];
  428. if (out_tensor_info.length > 0) {
  429. GE_CHK_GRAPH_STATUS_RET(ge_tensor.SetData(out_tensor_info.data.get(), out_tensor_info.length),
  430. "Failed to set output[%d].", out_index);
  431. }
  432. ge_tensor.MutableTensorDesc() = *args.output_desc[out_index];
  433. GELOGD("Set output[%d], tensor size = %ld, shape = [%s]",
  434. out_index,
  435. out_tensor_info.length,
  436. ge_tensor.MutableTensorDesc().MutableShape().ToString().c_str());
  437. ++out_index;
  438. }
  439. return SUCCESS;
  440. }
  441. Status HybridModelAsyncExecutor::DumpOpDebug() {
  442. const DumpProperties &dump_properties = executor_->GetContext()->dump_properties;
  443. if (dump_properties.IsOpDebugOpen()) {
  444. GELOGD("Opdebug is open in hybrid engine");
  445. uint32_t op_debug_mode = dump_properties.GetOpDebugMode();
  446. GE_CHK_RT_RET(op_debug_register_.RegisterDebugForStream(stream_, op_debug_mode, data_dumper_));
  447. is_op_debug_reg_ = true;
  448. data_dumper_.SetDumpProperties(dump_properties);
  449. data_dumper_.SetModelName(model_->GetModelName());
  450. data_dumper_.SetModelId(model_->GetModelId());
  451. data_dumper_.SetDeviceId(model_->GetDeviceId());
  452. void *global_step = nullptr;
  453. TensorValue *varible_global_step = model_->GetVariable(NODE_NAME_GLOBAL_STEP);
  454. if (varible_global_step != nullptr) {
  455. global_step = const_cast<void *>(varible_global_step->GetData());
  456. }
  457. void *loop_per_iter = nullptr;
  458. TensorValue *varible_loop_per_iter = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
  459. if (varible_loop_per_iter != nullptr) {
  460. loop_per_iter = const_cast<void *>(varible_loop_per_iter->GetData());
  461. }
  462. void *loop_cond = nullptr;
  463. TensorValue *varible_loop_cond = model_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_COND);
  464. if (varible_loop_cond != nullptr) {
  465. loop_cond = const_cast<void *>(varible_loop_cond->GetData());
  466. }
  467. data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
  468. GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "LoadDumpInfo failed in hybrid engine");
  469. GELOGD("Dump op debug SUCCESS in hybrid engine");
  470. }
  471. return SUCCESS;
  472. }
  473. } // namespace hybrid
  474. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示