You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_execute.cc 32 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
4 years ago
5 years ago
4 years ago
4 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "graph/execute/graph_execute.h"
  17. #include <memory>
  18. #include <string>
  19. #include "graph/load/model_manager/model_manager.h"
  20. #include "graph/load/model_manager/davinci_model.h"
  21. #include "common/profiling/profiling_manager.h"
  22. namespace ge {
  23. using Uint32Pair = pair<uint32_t, uint32_t>;
  24. const uint32_t kInvalidModelId = UINT32_MAX;
  25. GraphExecutor::GraphExecutor()
  26. : init_flag_(false),
  27. train_graph_flag_(false),
  28. sync_run_mutex_(nullptr),
  29. condition_(nullptr),
  30. graph_run_listener_(nullptr),
  31. last_graph_id_(UINT32_MAX),
  32. malloc_flag_(false) {}
  33. GraphExecutor::~GraphExecutor() {
  34. outputs_desc_.clear();
  35. if (malloc_flag_) {
  36. for (auto &buffer_addr : buffer_addr_) {
  37. rtError_t rt_ret;
  38. rt_ret = rtFreeHost(buffer_addr);
  39. if (rt_ret != RT_ERROR_NONE) {
  40. REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X", rt_ret);
  41. GELOGE(RT_FAILED, "[Call][RtFreeHost] subgraph free buffer failed, ret: 0x%X", rt_ret);
  42. }
  43. }
  44. }
  45. malloc_flag_ = false;
  46. buffer_addr_.clear();
  47. }
  48. Status GraphExecutor::SetCondition(std::mutex *mutex, std::condition_variable *cond,
  49. std::shared_ptr<GraphModelListener> listener) {
  50. if (mutex == nullptr) {
  51. REPORT_INNER_ERROR("E19999", "Check param mutex nullptr");
  52. GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param mutex is nullptr.");
  53. return GE_GRAPH_PARAM_NULLPTR;
  54. }
  55. if (cond == nullptr) {
  56. REPORT_INNER_ERROR("E19999", "Check param cond nullptr");
  57. GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param cond is nullptr.");
  58. return GE_GRAPH_PARAM_NULLPTR;
  59. }
  60. if (listener == nullptr) {
  61. REPORT_INNER_ERROR("E19999", "Check param listener nullptr");
  62. GELOGE(GE_GRAPH_PARAM_NULLPTR, "[Check][Param] input param listener is nullptr.");
  63. return GE_GRAPH_PARAM_NULLPTR;
  64. }
  65. sync_run_mutex_ = mutex;
  66. condition_ = cond;
  67. graph_run_listener_ = listener;
  68. init_flag_ = true;
  69. return SUCCESS;
  70. }
  71. Status GraphExecutor::SetDynamicSize(uint32_t model_id, const std::vector<uint64_t> &batch_num, int32_t dynamic_type) {
  72. auto model_manager = ge::ModelManager::GetInstance();
  73. GE_CHECK_NOTNULL(model_manager);
  74. Status ret = model_manager->SetDynamicSize(model_id, batch_num, dynamic_type);
  75. if (ret != SUCCESS) {
  76. GELOGE(ret, "[Set][DynamicSize] failed, model_id:%u", model_id);
  77. return ret;
  78. }
  79. return SUCCESS;
  80. }
  81. void GraphExecutor::SetTrainFlag(bool is_train_graph) { train_graph_flag_ = is_train_graph; }
  82. Status GraphExecutor::FreeInOutBuffer() {
  83. if (malloc_flag_) {
  84. for (auto iter = buffer_addr_.begin(); iter != buffer_addr_.end(); ++iter) {
  85. rtError_t rt_ret;
  86. rt_ret = rtFreeHost(*iter);
  87. if (rt_ret != RT_ERROR_NONE) {
  88. REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X", rt_ret);
  89. GELOGE(RT_FAILED, "[Call][RtFreeHost] subgraph free buffer failed, ret: 0x%X", rt_ret);
  90. (void)buffer_addr_.erase(buffer_addr_.begin(), iter);
  91. return GE_GRAPH_FREE_FAILED;
  92. }
  93. }
  94. buffer_addr_.clear();
  95. malloc_flag_ = false;
  96. return SUCCESS;
  97. } else {
  98. GELOGD("[GraphManager] not malloc buffer.");
  99. return SUCCESS;
  100. }
  101. }
  102. Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr) {
  103. if (malloc_flag_) {
  104. auto all_size_same = true;
  105. if (buffer_size.size() == buffer_size_.size()) {
  106. for (size_t i = 0; i < buffer_size.size(); i++) {
  107. if (buffer_size[i] != buffer_size_[i]) {
  108. all_size_same = false;
  109. break;
  110. }
  111. }
  112. } else {
  113. all_size_same = false;
  114. }
  115. if (all_size_same) {
  116. data_addr = buffer_addr_;
  117. return SUCCESS;
  118. }
  119. buffer_size_.clear();
  120. auto rt_ret = FreeInOutBuffer();
  121. if (rt_ret != SUCCESS) {
  122. GELOGE(RT_FAILED, "[Free][Buffer] failed, ret: 0x%X", rt_ret);
  123. return RT_FAILED;
  124. }
  125. }
  126. rtError_t rt_ret;
  127. for (size_t i = 0; i < buffer_size.size(); ++i) {
  128. void *tmp_buf = nullptr;
  129. rt_ret = rtMallocHost(&tmp_buf, buffer_size[i]);
  130. if (rt_ret != RT_ERROR_NONE) {
  131. REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%lu, ret:0x%X", buffer_size[i], rt_ret);
  132. GELOGE(RT_FAILED, "[Malloc][Buffer] failed, size:%lu, ret:0x%X", buffer_size[i], rt_ret);
  133. return GE_GRAPH_MALLOC_FAILED;
  134. }
  135. malloc_flag_ = true;
  136. data_addr.push_back(tmp_buf);
  137. buffer_addr_.push_back(tmp_buf);
  138. }
  139. buffer_size_ = buffer_size;
  140. return SUCCESS;
  141. }
  142. Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
  143. OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc) {
  144. // Preprocessing input data
  145. graph_input_data.index = 0;
  146. graph_input_data.timeout = 0;
  147. graph_input_data.timestamp = 0;
  148. std::size_t inputSize = input_tensor.size();
  149. std::size_t output_size = output_desc.size();
  150. std::vector<uint64_t> bufferSizeVec;
  151. std::vector<void *> addrVec;
  152. for (std::size_t i = 0; i < inputSize; ++i) {
  153. const GeTensor *InTensor = &input_tensor[i];
  154. GE_CHECK_NOTNULL(InTensor);
  155. bufferSizeVec.push_back(InTensor->GetData().size());
  156. }
  157. for (const auto &desc : output_desc) {
  158. bufferSizeVec.push_back(desc.size);
  159. }
  160. Status ret = MallocInOutBuffer(bufferSizeVec, addrVec);
  161. if (ret != SUCCESS) {
  162. GELOGE(GE_GRAPH_MALLOC_FAILED, "[Malloc][Mem] failed");
  163. return GE_GRAPH_MALLOC_FAILED;
  164. }
  165. for (std::size_t i = 0; i < input_tensor.size() && i < addrVec.size(); ++i) {
  166. const GeTensor *in_tensor = &input_tensor[i];
  167. GE_CHECK_NOTNULL(in_tensor);
  168. if ((addrVec[i] != nullptr) && (in_tensor->GetData().data() != nullptr)) {
  169. rtError_t rt_ret = rtMemcpy(addrVec[i], bufferSizeVec[i], in_tensor->GetData().data(),
  170. in_tensor->GetData().size(), RT_MEMCPY_HOST_TO_HOST);
  171. if (rt_ret != RT_ERROR_NONE) {
  172. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X",
  173. bufferSizeVec[i], in_tensor->GetData().size(), rt_ret);
  174. GELOGE(RT_FAILED, "[Call][RtMemcpy] failed, dst_size:%lu, src_size:%zu, ret:0x%X",
  175. bufferSizeVec[i], in_tensor->GetData().size(), rt_ret);
  176. return RT_FAILED;
  177. }
  178. }
  179. DataBuffer in_data_buf;
  180. in_data_buf.data = reinterpret_cast<uint8_t *>(addrVec[i]);
  181. in_data_buf.length = in_tensor->GetData().size();
  182. in_data_buf.isDataSupportMemShare = false;
  183. graph_input_data.blobs.push_back(in_data_buf);
  184. }
  185. graph_output_data.index = 0;
  186. for (std::size_t j = 0; j < output_size; j++) {
  187. auto desc = output_desc[j];
  188. uint64_t buffer_size = desc.size;
  189. DataBuffer out_data_buf;
  190. out_data_buf.data = reinterpret_cast<uint8_t *>(addrVec[inputSize + j]);
  191. out_data_buf.length = buffer_size;
  192. out_data_buf.isDataSupportMemShare = false;
  193. graph_output_data.blobs.push_back(out_data_buf);
  194. }
  195. return SUCCESS;
  196. }
  197. Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor,
  198. std::vector<GeTensor> &output_tensor) {
  199. auto model_manager = ge::ModelManager::GetInstance();
  200. GE_CHECK_NOTNULL(model_manager);
  201. if (model_manager->IsDynamicShape(model_id)) {
  202. GELOGI("[ExecuteGraph] GetInputOutputDescInfo via dynamic shape model executor, modelId=%u", model_id);
  203. return model_manager->SyncExecuteModel(model_id, input_tensor, output_tensor);
  204. }
  205. // Prepare input and output
  206. std::vector<InputOutputDescInfo> inputs_desc;
  207. std::vector<InputOutputDescInfo> output_desc;
  208. GELOGI("[ExecuteGraph] GetInputOutputDescInfo via new ome begin.");
  209. Status ret = GetInputOutputDescInfo(model_id, inputs_desc, output_desc);
  210. if (ret != SUCCESS) {
  211. GELOGE(GE_GRAPH_GET_IN_OUT_FAILED, "[Get][InputOutputDescInfo] failed, modelId=%u.", model_id);
  212. return GE_GRAPH_GET_IN_OUT_FAILED;
  213. }
  214. outputs_desc_.assign(output_desc.begin(), output_desc.end());
  215. InputData input_data;
  216. OutputData output_data;
  217. input_data.model_id = model_id;
  218. ret = PrepareInputData(input_tensor, input_data, output_data, output_desc);
  219. if (ret != SUCCESS) {
  220. GELOGE(GE_GRAPH_PREPARE_FAILED, "[Prepare][InputData] failed, modelId=%u.", model_id);
  221. return GE_GRAPH_PREPARE_FAILED;
  222. }
  223. if (graph_run_listener_->ResetResult() != SUCCESS) {
  224. REPORT_CALL_ERROR("E19999", "Call graph_run_listener_.ResetResult fail, model_id:%u", model_id);
  225. GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Reset][Result] failed, model_id:%u", model_id);
  226. return GE_GRAPH_EXECUTE_FAILED;
  227. }
  228. // Run mode async
  229. GELOGI("[ExecuteGraph] DataInput via new ome begin.");
  230. ret = DataInput(input_data, output_data);
  231. if (ret != SUCCESS) {
  232. GELOGE(GE_GRAPH_DATA_INPUT_FAILED, "[Call][DataInput] push data failed, modelId=%u.", model_id);
  233. return GE_GRAPH_DATA_INPUT_FAILED;
  234. }
  235. GELOGI("[GraphExecutor] input data push to wrapper finish, waiting for result...");
  236. // Pending until async execute graph complete
  237. {
  238. std::unique_lock<std::mutex> ulock(*sync_run_mutex_);
  239. if (!graph_run_listener_->IsFinished()) {
  240. (*condition_).wait(ulock);
  241. }
  242. // Run graph return
  243. uint32_t result_code = graph_run_listener_->GetResultCode();
  244. if (result_code != SUCCESS && result_code != END_OF_SEQUENCE) {
  245. REPORT_CALL_ERROR("E19999", "Graph_run_listener_ run fail, result:%u, model_id:%u", result_code, model_id);
  246. GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Execute][Model] failed, ret=%u, modelId=%u.", result_code, model_id);
  247. return GE_GRAPH_EXECUTE_FAILED;
  248. }
  249. }
  250. for (size_t i = 0; i < output_data.blobs.size(); ++i) {
  251. DataBuffer outputDataTmp = output_data.blobs[i];
  252. CHECK_FALSE_EXEC(outputDataTmp.length != 0,
  253. REPORT_INNER_ERROR("E19999", "Param output_data.length is 0 in model:%u, check invalid",
  254. model_id);
  255. GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Check][Param] Failed to allocate memory, "
  256. "length is 0, model id:%u", model_id);
  257. return GE_GRAPH_EXECUTE_FAILED);
  258. std::unique_ptr<uint8_t> outBufTmp(new (std::nothrow) uint8_t[outputDataTmp.length]);
  259. if (outBufTmp == nullptr) {
  260. REPORT_CALL_ERROR("E19999", "New output buffer fail, length:%lu, model:%u", outputDataTmp.length, model_id);
  261. GELOGE(FAILED, "[Allocate][Memory] failed, length:%lu, model:%u", outputDataTmp.length, model_id);
  262. return FAILED;
  263. }
  264. GE_PRINT_DYNAMIC_MEMORY(new, "the output memory of data on training.", sizeof(uint8_t) * outputDataTmp.length)
  265. rtError_t ret_value = rtMemcpy(outBufTmp.get(), outputDataTmp.length, outputDataTmp.data, outputDataTmp.length,
  266. RT_MEMCPY_HOST_TO_HOST);
  267. CHECK_FALSE_EXEC(ret_value == RT_ERROR_NONE,
  268. REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X",
  269. outputDataTmp.length, outputDataTmp.length, ret_value);
  270. GELOGE(GE_GRAPH_EXECUTE_FAILED, "[Call][RtMemcpy] failed, dst_size:%lu, src_size:%zu, ret:0x%X",
  271. outputDataTmp.length, outputDataTmp.length, ret_value);
  272. return GE_GRAPH_EXECUTE_FAILED);
  273. GeTensor outTensor;
  274. std::vector<int64_t> shapeDims;
  275. for (const auto &dim : output_desc[i].shape_info.dims) {
  276. shapeDims.push_back(dim);
  277. }
  278. GeShape outShape(shapeDims);
  279. outTensor.MutableTensorDesc().SetShape(outShape);
  280. outTensor.MutableTensorDesc().SetDataType((DataType)output_desc[i].data_type);
  281. (void)outTensor.SetData(outBufTmp.get(), outputDataTmp.length);
  282. output_tensor.push_back(outTensor);
  283. }
  284. GELOGI("[GraphExecutor] execute model success, modelId=%u.", model_id);
  285. return SUCCESS;
  286. }
  287. void GraphExecutor::InitModelIdInfo(std::vector<uint32_t> &out_model_id_info,
  288. std::vector<SubGraphInfoPtr> &sub_graph_vec, uint32_t output_size) {
  289. for (uint32_t i = 0; i < output_size; i++) {
  290. for (size_t j = 0; j < sub_graph_vec.size(); j++) {
  291. if (sub_graph_vec[j]->GetOutputFlag().size() == output_size && sub_graph_vec[j]->GetOutputFlag().at(i)) {
  292. out_model_id_info.push_back(sub_graph_vec[j]->GetModelIdInfo().model_id);
  293. }
  294. }
  295. }
  296. }
  297. Status GraphExecutor::FreeExecuteMemory() {
  298. auto ret = FreeInOutBuffer();
  299. if (ret != SUCCESS) {
  300. GELOGE(ret, "[Free][InOutBuffer] Error!");
  301. return ret;
  302. }
  303. return SUCCESS;
  304. }
  305. Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_root_model,
  306. const std::vector<GeTensor> &input_tensor, std::vector<GeTensor> &output_tensor) {
  307. if (graph_id != last_graph_id_) {
  308. auto ret = FreeExecuteMemory();
  309. if (ret != SUCCESS) {
  310. return ret;
  311. }
  312. }
  313. last_graph_id_ = graph_id;
  314. if (!init_flag_) {
  315. REPORT_INNER_ERROR("E19999", "No SetCondition called before, graph:%u, check invalid",
  316. graph_id);
  317. GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[Check][Param] AI Core Engine without calling SetCondition! graph id:%u",
  318. graph_id);
  319. return GE_GRAPH_EXECUTE_NOT_INIT;
  320. }
  321. GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
  322. Status ret = SyncExecuteModel(ge_root_model->GetModelId(), input_tensor, output_tensor);
  323. if (ret != SUCCESS) {
  324. GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[SyncExecute][Model] Error! graph id:%u", graph_id);
  325. return GE_GRAPH_SYNC_MODEL_FAILED;
  326. }
  327. ret = ModelSubscribe(graph_id);
  328. if (ret != SUCCESS) {
  329. GELOGE(ret, "[Call][ModelSubscribe] failed, graph_id:%u", graph_id);
  330. return ret;
  331. }
  332. return SUCCESS;
  333. }
  334. Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
  335. const std::vector<ge::Tensor> &input_tensor,
  336. const RunAsyncCallback& callback) {
  337. GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id);
  338. if (graph_id != last_graph_id_) {
  339. auto ret = FreeExecuteMemory();
  340. if (ret != SUCCESS) {
  341. return ret;
  342. }
  343. }
  344. last_graph_id_ = graph_id;
  345. GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
  346. Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback);
  347. if (ret != SUCCESS) {
  348. GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[AsyncExecute][Model] Error! graph id:%u", graph_id);
  349. return GE_GRAPH_SYNC_MODEL_FAILED;
  350. }
  351. GELOGI("[GraphExecutor] Async execute graph success, graph_id=%u", graph_id);
  352. return SUCCESS;
  353. }
  354. Status GraphExecutor::GetExecuteData(const std::vector<GeTensor> &input_tensor, std::vector<DataBuffer> &blobs,
  355. std::vector<GeTensorDesc> &tensor_desc) {
  356. for (const auto &tensor : input_tensor) {
  357. DataBuffer in_data_buf;
  358. // check placement
  359. in_data_buf.data = const_cast<uint8_t *>(tensor.GetData().data());
  360. in_data_buf.length = tensor.GetData().size();
  361. in_data_buf.isDataSupportMemShare = false;
  362. blobs.emplace_back(in_data_buf);
  363. tensor_desc.emplace_back(tensor.GetTensorDesc());
  364. }
  365. return SUCCESS;
  366. }
  367. Status GraphExecutor::ExecuteGraphWithStream(GraphId graph_id,
  368. rtStream_t stream,
  369. const GeRootModelPtr &ge_root_model,
  370. const std::vector<GeTensor> &input_tensor,
  371. std::vector<GeTensor> &output_tensor) {
  372. GELOGI("[GraphExecutor] Start to execute graph with stream, graph id = %u, stream = %p.", graph_id, stream);
  373. if (!init_flag_) {
  374. REPORT_INNER_ERROR("E19999", "No SetCondition called before, graph id = %u, stream = %p, check invalid.",
  375. graph_id, stream);
  376. GELOGE(GE_GRAPH_EXECUTE_NOT_INIT, "[Check][Param] AI Core Engine without calling SetCondition! graph id = %u",
  377. graph_id);
  378. return GE_GRAPH_EXECUTE_NOT_INIT;
  379. }
  380. if (graph_id != last_graph_id_) {
  381. auto ret = FreeExecuteMemory();
  382. if (ret != SUCCESS) {
  383. return ret;
  384. }
  385. }
  386. last_graph_id_ = graph_id;
  387. GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
  388. auto model_id = ge_root_model->GetModelId();
  389. InputData input_data;
  390. input_data.index = 0;
  391. input_data.model_id = model_id;
  392. std::vector<GeTensorDesc> input_desc;
  393. auto ret = GetExecuteData(input_tensor, input_data.blobs, input_desc);
  394. if (ret != SUCCESS) {
  395. return ret;
  396. }
  397. OutputData output_data;
  398. output_data.index = 0;
  399. output_data.model_id = model_id;
  400. std::vector<GeTensorDesc> output_desc;
  401. ret = GetExecuteData(output_tensor, output_data.blobs, output_desc);
  402. if (ret != SUCCESS) {
  403. return ret;
  404. }
  405. auto async_mode = true;
  406. auto model_manager = ge::ModelManager::GetInstance();
  407. GE_CHECK_NOTNULL(model_manager);
  408. ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
  409. if (ret != SUCCESS) {
  410. return ret;
  411. }
  412. GELOGI("[GraphExecutor] Async execute graph with stream success graph id = %u, stream = %p.", graph_id, stream);
  413. return SUCCESS;
  414. }
  415. bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) {
  416. return lhs.second < rhs.second;
  417. }
  418. uint32_t GraphExecutor::GetExecuteModelId(const GeRootModelPtr &ge_root_model) {
  419. std::vector<uint32_t> model_ids = ge_root_model->GetAllModelId();
  420. if (model_ids.empty()) {
  421. return kInvalidModelId;
  422. }
  423. if (model_ids.size() == 1) {
  424. return ge_root_model->GetModelId();
  425. }
  426. std::vector<Uint32Pair> model_id_to_loads;
  427. auto model_manager = ModelManager::GetInstance();
  428. GE_CHECK_NOTNULL(model_manager);
  429. for (auto model_id : model_ids) {
  430. auto davinci_model = model_manager->GetModel(model_id);
  431. auto hybrid_model = model_manager->GetHybridModel(model_id);
  432. if (hybrid_model == nullptr) {
  433. GE_CHECK_NOTNULL(davinci_model);
  434. }
  435. uint32_t input_load = hybrid_model != nullptr ? hybrid_model->GetDataInputerSize() :
  436. davinci_model->GetDataInputerSize();
  437. uint32_t running_load = hybrid_model != nullptr ? static_cast<uint32_t>(hybrid_model->GetRunningFlag()) :
  438. static_cast<uint32_t>(davinci_model->GetRunningFlag());
  439. uint32_t load = input_load + running_load;
  440. if (load == 0) {
  441. return model_id;
  442. }
  443. model_id_to_loads.emplace_back(model_id, load);
  444. }
  445. sort(model_id_to_loads.begin(), model_id_to_loads.end(), CompareByLoad);
  446. if (model_id_to_loads.empty()) {
  447. return kInvalidModelId;
  448. }
  449. return model_id_to_loads.begin()->first;
  450. }
  451. Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model,
  452. const RunAsyncCallback &callback) {
  453. auto model_manager = ge::ModelManager::GetInstance();
  454. GE_CHECK_NOTNULL(model_manager);
  455. if (model_manager->IsNeedHybridLoad(*ge_root_model)) {
  456. auto model = model_manager->GetHybridModel(model_id);
  457. GE_CHECK_NOTNULL(model);
  458. if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
  459. GELOGE(FAILED, "[Set][RunAsyncListenerCallback] failed, model_id %u", model_id);
  460. return FAILED;
  461. }
  462. } else {
  463. auto model = model_manager->GetModel(model_id);
  464. GE_CHECK_NOTNULL(model);
  465. if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
  466. GELOGE(FAILED, "[Set][RunAsyncListenerCallback] failed, model_id %u", model_id);
  467. return FAILED;
  468. }
  469. }
  470. return SUCCESS;
  471. }
  472. Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<ge::Tensor> &inputs,
  473. const RunAsyncCallback &callback) {
  474. uint32_t model_id = GetExecuteModelId(ge_root_model);
  475. if (model_id == kInvalidModelId) {
  476. GELOGE(INTERNAL_ERROR, "No valid model id.");
  477. return INTERNAL_ERROR;
  478. }
  479. try {
  480. auto model_manager = ge::ModelManager::GetInstance();
  481. GE_CHECK_NOTNULL(model_manager);
  482. GELOGI("RunAsync begin.model_id %u", model_id);
  483. if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) {
  484. GELOGE(FAILED, "[Set][CallBack] for model fail, model_id %u", model_id);
  485. return FAILED;
  486. }
  487. Status ret = model_manager->DataInputTensor(model_id, inputs);
  488. if (ret != SUCCESS) {
  489. GELOGE(ret, "[Call][DataInputTensor] RunAsync: DataInput fail, model_id %u", model_id);
  490. return ret;
  491. }
  492. GELOGI("RunAsync success.");
  493. } catch (std::bad_alloc &) {
  494. REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id %u", model_id);
  495. GELOGE(MEMALLOC_FAILED, "[Run][Async] failed, bad memory allocation occur, model_id %u", model_id);
  496. return MEMALLOC_FAILED;
  497. } catch (...) {
  498. REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id %u", model_id);
  499. GELOGE(FAILED, "[Run][Async] failed, some exceptions occur, model_id %u", model_id);
  500. return FAILED;
  501. }
  502. return SUCCESS;
  503. }
  504. Status GraphExecutor::DataInput(const InputData &input_data, OutputData &output_data) {
  505. try {
  506. auto model_manager = ge::ModelManager::GetInstance();
  507. GE_CHECK_NOTNULL(model_manager);
  508. Status ret = model_manager->DataInput(input_data, output_data);
  509. if (ret != SUCCESS) {
  510. GELOGE(ret, "[Call][DataInput] failed.");
  511. return ret;
  512. }
  513. } catch (std::bad_alloc &) {
  514. REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed");
  515. GELOGE(MEMALLOC_FAILED, "[Call][DataInput] failed, bad memory allocation occur !");
  516. return MEMALLOC_FAILED;
  517. } catch (...) {
  518. REPORT_INNER_ERROR("E19999", "Some exceptions occur failed");
  519. GELOGE(FAILED, "[Call][DataInput] failed, some exceptions occur !");
  520. return FAILED;
  521. }
  522. return SUCCESS;
  523. }
  524. Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
  525. vector<InputOutputDescInfo> &output_desc) {
  526. try {
  527. auto model_manager = ge::ModelManager::GetInstance();
  528. GE_CHECK_NOTNULL(model_manager);
  529. Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc);
  530. if (ret != SUCCESS) {
  531. GELOGE(ret, "[Get][InputOutputDescInfo] failed, model_id:%u.", model_id);
  532. return ret;
  533. }
  534. } catch (std::bad_alloc &) {
  535. REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id:%u.", model_id);
  536. GELOGE(MEMALLOC_FAILED, "[Get][InputOutputDescInfo] failed, bad memory allocation occur, model_id:%u.", model_id);
  537. return MEMALLOC_FAILED;
  538. } catch (...) {
  539. REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id:%u.", model_id);
  540. GELOGE(FAILED, "[Get][InputOutputDescInfo] failed, some exceptions occur, model_id:%u.", model_id);
  541. return FAILED;
  542. }
  543. return SUCCESS;
  544. }
  545. Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
  546. vector<InputOutputDescInfo> &output_desc,
  547. std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats,
  548. bool new_model_desc) {
  549. try {
  550. auto model_manager = ge::ModelManager::GetInstance();
  551. GE_CHECK_NOTNULL(model_manager);
  552. Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats,
  553. new_model_desc);
  554. if (ret != SUCCESS) {
  555. GELOGE(ret, "[Get][InputOutputDescInfo] failed, model_id:%u.", model_id);
  556. return ret;
  557. }
  558. } catch (std::bad_alloc &) {
  559. REPORT_INNER_ERROR("E19999", "Bad memory allocation exception occur failed, model_id:%u.", model_id);
  560. GELOGE(MEMALLOC_FAILED, "[Get][InputOutputDescInfo] failed, bad memory allocation occur, model_id:%u.", model_id);
  561. return MEMALLOC_FAILED;
  562. } catch (...) {
  563. REPORT_INNER_ERROR("E19999", "Some exceptions occur failed, model_id:%u.", model_id);
  564. GELOGE(FAILED, "[Get][InputOutputDescInfo] failed, some exceptions occur, model_id:%u.", model_id);
  565. return FAILED;
  566. }
  567. return SUCCESS;
  568. }
  569. ///
  570. /// @ingroup ge
  571. /// @brief Get dynamic batch_info
  572. /// @param [in] model_id
  573. /// @param [out] batch_info
  574. /// @param [out] dynamic_type
  575. /// @return execute result
  576. ///
  577. Status GraphExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
  578. int32_t &dynamic_type) {
  579. auto model_manager = ge::ModelManager::GetInstance();
  580. GE_CHECK_NOTNULL(model_manager);
  581. Status ret = model_manager->GetDynamicBatchInfo(model_id, batch_info, dynamic_type);
  582. if (ret != SUCCESS) {
  583. GELOGE(ret, "[Get][DynamicBatchInfo] failed, model_id:%u.", model_id);
  584. return ret;
  585. }
  586. return SUCCESS;
  587. }
  588. ///
  589. /// @ingroup ge
  590. /// @brief Get combined dynamic dims info
  591. /// @param [in] model_id
  592. /// @param [out] batch_info
  593. /// @return execute result
  594. ///
  595. Status GraphExecutor::GetCombinedDynamicDims(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info) {
  596. auto model_manager = ge::ModelManager::GetInstance();
  597. GE_CHECK_NOTNULL(model_manager);
  598. Status ret = model_manager->GetCombinedDynamicDims(model_id, batch_info);
  599. if (ret != SUCCESS) {
  600. GELOGE(ret, "[Call][GetCombinedDynamicDims] failed, model_id:%u.", model_id);
  601. return ret;
  602. }
  603. return SUCCESS;
  604. }
  605. ///
  606. /// @ingroup ge
  607. /// @brief Get user designate shape order
  608. /// @param [in] model_id
  609. /// @param [out] user_input_shape_order
  610. /// @return execute result
  611. ///
  612. ge::Status GraphExecutor::GetUserDesignateShapeOrder(uint32_t model_id,
  613. std::vector<std::string> &user_input_shape_order) {
  614. auto model_manager = ge::ModelManager::GetInstance();
  615. GE_CHECK_NOTNULL(model_manager);
  616. Status ret = model_manager->GetUserDesignateShapeOrder(model_id, user_input_shape_order);
  617. if (ret != SUCCESS) {
  618. GELOGE(ret, "[Get][UserDesignateShapeOrder] failed, model_id:%u.", model_id);
  619. return ret;
  620. }
  621. return SUCCESS;
  622. }
  623. Status GraphExecutor::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
  624. auto model_manager = ge::ModelManager::GetInstance();
  625. GE_CHECK_NOTNULL(model_manager);
  626. Status ret = model_manager->GetCurShape(model_id, batch_info, dynamic_type);
  627. if (ret != SUCCESS) {
  628. GELOGE(ret, "[Get][CurShape] failed, model_id:%u", model_id);
  629. return ret;
  630. }
  631. return SUCCESS;
  632. }
  633. Status GraphExecutor::GetOpAttr(uint32_t model_id, const std::string &op_name, const std::string &attr_name,
  634. std::string &attr_value) {
  635. auto model_manager = ge::ModelManager::GetInstance();
  636. GE_CHECK_NOTNULL(model_manager);
  637. Status ret = model_manager->GetOpAttr(model_id, op_name, attr_name, attr_value);
  638. if (ret != SUCCESS) {
  639. GELOGE(ret, "[Get][OpAttr]Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
  640. REPORT_CALL_ERROR("E19999", "Get op:%s attr:%s failed.", op_name.c_str(), attr_name.c_str());
  641. return ret;
  642. }
  643. return SUCCESS;
  644. }
  645. Status GraphExecutor::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
  646. auto model_manager = ge::ModelManager::GetInstance();
  647. GE_CHECK_NOTNULL(model_manager);
  648. Status ret = model_manager->GetModelAttr(model_id, dynamic_output_shape_info);
  649. if (ret != SUCCESS) {
  650. GELOGE(FAILED, "[Get][ModelAttr] failed, model_id:%u", model_id);
  651. return ret;
  652. }
  653. return SUCCESS;
  654. }
  655. Status GraphExecutor::GetAippInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
  656. auto model_manager = ge::ModelManager::GetInstance();
  657. GE_CHECK_NOTNULL(model_manager);
  658. Status ret = model_manager->GetAippInfo(model_id, index, aipp_info);
  659. if (ret != SUCCESS) {
  660. GELOGW("GetAIPPInfo is not success.");
  661. return ret;
  662. }
  663. return SUCCESS;
  664. }
  665. Status GraphExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
  666. auto model_manager = ge::ModelManager::GetInstance();
  667. GE_CHECK_NOTNULL(model_manager);
  668. Status ret = model_manager->GetAippType(model_id, index, type, aipp_index);
  669. if (ret != SUCCESS) {
  670. GELOGW("Get aipp type is not success.");
  671. return ret;
  672. }
  673. return SUCCESS;
  674. }
  675. Status GraphExecutor::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) {
  676. auto model_manager = ge::ModelManager::GetInstance();
  677. GE_CHECK_NOTNULL(model_manager);
  678. Status ret = model_manager->GetOrigInputInfo(model_id, index, orig_input_info);
  679. if (ret != SUCCESS) {
  680. GELOGE(ret, "[Get][OrigInputInfo] failed, model_id:%u, index:%u.", model_id, index);
  681. return ret;
  682. }
  683. return SUCCESS;
  684. }
  685. Status GraphExecutor::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index,
  686. std::vector<InputOutputDims> &input_dims,
  687. std::vector<InputOutputDims> &output_dims) {
  688. auto model_manager = ge::ModelManager::GetInstance();
  689. GE_CHECK_NOTNULL(model_manager);
  690. Status ret = model_manager->GetAllAippInputOutputDims(model_id, index, input_dims, output_dims);
  691. if (ret != SUCCESS) {
  692. GELOGE(ret, "[Get][AllAippInputOutputDims] failed, model_id:%u, index:%u.", model_id, index);
  693. return ret;
  694. }
  695. return SUCCESS;
  696. }
  697. Status GraphExecutor::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id,
  698. OpDescInfo &op_desc_info) {
  699. auto model_manager = ge::ModelManager::GetInstance();
  700. GE_CHECK_NOTNULL(model_manager);
  701. Status ret = model_manager->GetOpDescInfo(device_id, stream_id, task_id, op_desc_info);
  702. if (ret != SUCCESS) {
  703. GELOGE(ret, "[Get][OpDescInfo] failed, device_id:%u, stream_id:%u, task_id:%u.",
  704. device_id, stream_id, task_id);
  705. return ret;
  706. }
  707. return SUCCESS;
  708. }
  709. Status GraphExecutor::GetModelByID(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model) {
  710. auto model_manager = ge::ModelManager::GetInstance();
  711. GE_CHECK_NOTNULL(model_manager);
  712. davinci_model = model_manager->GetModel(static_cast<uint32_t>(model_id));
  713. if (davinci_model == nullptr) {
  714. REPORT_INNER_ERROR("E19999", "GetModel from model_manager fail, model_id:%u", model_id);
  715. GELOGE(ge::FAILED, "[Get][Model] failed, Model id:%d is invaild or model is not loaded.", model_id);
  716. return ge::FAILED;
  717. }
  718. return ge::SUCCESS;
  719. }
  720. Status GraphExecutor::ModelSubscribe(uint32_t graph_id) {
  721. auto &profiling_manager = ProfilingManager::Instance();
  722. const auto &subcribe_info = profiling_manager.GetSubscribeInfo();
  723. if (subcribe_info.is_subscribe) {
  724. std::shared_ptr<DavinciModel> davinci_model = nullptr;
  725. uint32_t model_id = 0;
  726. Status ret = profiling_manager.GetModelIdFromGraph(graph_id, model_id);
  727. if (ret != SUCCESS) {
  728. GELOGE(ret, "[Call][GetModelIdFromGraph] failed, graph_id:%u", graph_id);
  729. return ret;
  730. }
  731. ret = GetModelByID(model_id, davinci_model);
  732. if (ret != SUCCESS) {
  733. GELOGE(ret, "[Call][GetModelByID] failed, model_id:%u", model_id);
  734. return ret;
  735. }
  736. ret = profiling_manager.ProfModelSubscribe(subcribe_info.prof_switch, davinci_model.get());
  737. if (ret != SUCCESS) {
  738. GELOGE(ret, "[Call][ProfModelSubscribe] failed");
  739. return ret;
  740. }
  741. }
  742. return SUCCESS;
  743. }
  744. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示