You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ge_api.cc 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
4 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ge/ge_api.h"
  17. #include <iostream>
  18. #include <malloc.h>
  19. #include "common/debug/log.h"
  20. #include "framework/common/debug/ge_log.h"
  21. #include "common/ge/datatype_util.h"
  22. #include "proto/ge_api.pb.h"
  23. #include "graph/model_serialize.h"
  24. #include "graph/detail/model_serialize_imp.h"
  25. #include "graph/utils/tensor_adapter.h"
  26. #include "init/gelib.h"
  27. #include "session/session_manager.h"
  28. #include "graph/opsproto_manager.h"
  29. #include "graph/utils/type_utils.h"
  30. #include "graph/manager/util/rt_context_util.h"
  31. #include "graph/common/ge_call_wrapper.h"
  32. #include "register/op_registry.h"
  33. #include "common/ge/tbe_plugin_manager.h"
  34. using domi::OpRegistry;
  35. using std::map;
  36. using std::string;
  37. using std::vector;
  38. namespace {
  39. const int32_t kMaxStrLen = 128;
  40. } // namespace
  41. static bool g_ge_initialized = false;
  42. static std::mutex g_ge_release_mutex; // GEFinalize and ~Session use
  43. namespace ge {
  44. void GetOpsProtoPath(std::string &opsproto_path) {
  45. GELOGI("Enter get ops proto path schedule");
  46. const char *path_env = std::getenv("ASCEND_OPP_PATH");
  47. if (path_env != nullptr) {
  48. std::string path = path_env;
  49. opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
  50. GELOGI("Get opsproto so path from env: %s", path.c_str());
  51. return;
  52. }
  53. std::string path_base = PluginManager::GetPath();
  54. GELOGI("path_base is %s", path_base.c_str());
  55. path_base = path_base.substr(0, path_base.rfind('/'));
  56. path_base = path_base.substr(0, path_base.rfind('/') + 1);
  57. opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
  58. }
  59. Status CheckOptionsValid(const std::map<string, string> &options) {
  60. // check job_id is valid
  61. auto job_id_iter = options.find(OPTION_EXEC_JOB_ID);
  62. if (job_id_iter != options.end()) {
  63. if (job_id_iter->second.length() > kMaxStrLen) {
  64. GELOGE(PARAM_INVALID, "CheckOptionsValid job_id failed, string len > %d", kMaxStrLen);
  65. return FAILED;
  66. }
  67. }
  68. return SUCCESS;
  69. }
  70. // Initialize GE, prepare for execution, call GELib::Initialize
  71. Status GEInitialize(const std::map<string, string> &options) {
  72. GELOGT(TRACE_INIT, "GEInitialize start");
  73. // 0.check init status
  74. if (g_ge_initialized) {
  75. GELOGW("GEInitialize is called more than once");
  76. return SUCCESS;
  77. }
  78. // Load OpsProto lib plugin
  79. std::string opsproto_path;
  80. GetOpsProtoPath(opsproto_path);
  81. OpsProtoManager *manager = OpsProtoManager::Instance();
  82. std::map<string, string> option_tmp;
  83. option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
  84. GE_TIMESTAMP_START(GEInitialize);
  85. bool is_proto_init = manager->Initialize(option_tmp);
  86. GE_TIMESTAMP_END(GEInitialize, "GEInitialize::ManagerInitialize");
  87. if (!is_proto_init) {
  88. GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, ops proto path is invalid.");
  89. return FAILED;
  90. }
  91. // check options is valid
  92. GE_TIMESTAMP_START(CheckOptionsValid);
  93. if (CheckOptionsValid(options) != SUCCESS) {
  94. return FAILED;
  95. }
  96. GE_TIMESTAMP_END(CheckOptionsValid, "GEInitialize::CheckOptionsValid");
  97. GE_TIMESTAMP_START(InitPreparation);
  98. TBEPluginManager::Instance().InitPreparation(options);
  99. GE_TIMESTAMP_END(InitPreparation, "GEInitialize::InitPreparation");
  100. // call Initialize
  101. GELOGT(TRACE_RUNNING, "Initializing environment");
  102. GE_TIMESTAMP_START(GELibInitialize);
  103. Status ret = ge::GELib::Initialize(options);
  104. GE_TIMESTAMP_END(GELibInitialize, "GEInitialize::GELibInitialize");
  105. if (ret != SUCCESS) {
  106. GELOGE(GE_CLI_INIT_FAILED, "geInitialize failed, error code = %u", ret);
  107. return FAILED;
  108. }
  109. // 7.check return status, return
  110. if (!g_ge_initialized) {
  111. // Initialize success, first time calling initialize
  112. g_ge_initialized = true;
  113. }
  114. GELOGT(TRACE_STOP, "GEInitialize finished");
  115. return ret;
  116. }
  117. // GE finalize, releasing all resources
  118. Status GEFinalize() {
  119. GELOGT(TRACE_INIT, "GEFinalize start");
  120. // check init status
  121. if (!g_ge_initialized) {
  122. GELOGW("GEFinalize is called before GEInitialize");
  123. return SUCCESS;
  124. }
  125. std::lock_guard<std::mutex> lock(g_ge_release_mutex);
  126. // call Finalize
  127. Status ret = SUCCESS;
  128. Status middle_ret;
  129. GELOGT(TRACE_RUNNING, "Finalizing environment");
  130. std::shared_ptr<GELib> instancePtr = ge::GELib::GetInstance();
  131. if (instancePtr == nullptr || !instancePtr->InitFlag()) {
  132. GELOGW("GEFinalize Failed: GE not initialized.");
  133. ret = GE_CLI_GE_NOT_INITIALIZED;
  134. }
  135. if (ret != GE_CLI_GE_NOT_INITIALIZED) {
  136. middle_ret = instancePtr->Finalize();
  137. GELOGI("GEFinalize finalize gelib ret=%u", middle_ret);
  138. if (middle_ret != SUCCESS) {
  139. ret = middle_ret;
  140. }
  141. }
  142. middle_ret = TBEPluginManager::Instance().Finalize();
  143. if (middle_ret != SUCCESS) {
  144. ret = middle_ret;
  145. }
  146. if (g_ge_initialized && ret == SUCCESS) {
  147. // Unified destruct rt_context
  148. RtContextUtil::GetInstance().DestroyAllRtContexts();
  149. g_ge_initialized = false;
  150. }
  151. // to avoid memory fragment, use malloc_trim to back free stack to system
  152. malloc_trim(0);
  153. GELOGT(TRACE_STOP, "GEFinalize finished");
  154. return ret;
  155. }
  156. // Initialize session,which calls innerSession
  157. Session::Session(const std::map<string, string> &options) {
  158. GELOGT(TRACE_INIT, "Session Constructor start");
  159. // check init status
  160. sessionId_ = 0;
  161. if (!g_ge_initialized) {
  162. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized.");
  163. return;
  164. }
  165. // call Initialize
  166. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  167. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  168. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session Constructor failed");
  169. return;
  170. }
  171. GELOGT(TRACE_RUNNING, "Creating session");
  172. uint64_t session_id = 0;
  173. Status ret = instance_ptr->SessionManagerObj().CreateSession(options, session_id);
  174. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  175. // check return status, return, update session id if success
  176. if (ret == SUCCESS) {
  177. sessionId_ = session_id;
  178. } else {
  179. GELOGE(ret, "Session constructor failed, session Id not initialized");
  180. return;
  181. }
  182. GELOGT(TRACE_STOP, "Session Constructor finished");
  183. }
  184. // session destructor
  185. Session::~Session() {
  186. GELOGT(TRACE_INIT, "Session Destructor start");
  187. // 0.check init status
  188. if (!g_ge_initialized) {
  189. GELOGW("GE is not yet initialized or is finalized.");
  190. return;
  191. }
  192. Status ret = FAILED;
  193. std::lock_guard<std::mutex> lock(g_ge_release_mutex);
  194. try {
  195. uint64_t session_id = sessionId_;
  196. // call DestroySession
  197. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  198. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  199. GELOGW("GE is not yet initialized or is finalized.");
  200. return;
  201. }
  202. GELOGT(TRACE_RUNNING, "Session id is %lu", session_id);
  203. GELOGT(TRACE_RUNNING, "Destroying session");
  204. ret = instance_ptr->SessionManagerObj().DestroySession(session_id);
  205. } catch (google::protobuf::FatalException &e) {
  206. GELOGE(GE_CLI_SESS_DESTROY_FAILED, "SessionDestructor throws FatalException");
  207. }
  208. // check return status, return, update session id if success
  209. if (ret != SUCCESS) {
  210. GELOGE(ret, "Session Destructor failed");
  211. }
  212. GELOGT(TRACE_STOP, "Session Destructor finished");
  213. }
  214. Status Session::AddGraph(uint32_t graph_id, const Graph &graph) {
  215. std::map<std::string, std::string> options;
  216. return AddGraph(graph_id, graph, options);
  217. }
  218. Status Session::AddGraph(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options) {
  219. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  220. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  221. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  222. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
  223. return FAILED;
  224. }
  225. GELOGD("Adding graph to session");
  226. Status ret = instance_ptr->SessionManagerObj().AddGraph(sessionId_, graph_id, graph, options);
  227. if (ret != SUCCESS) {
  228. GELOGE(ret, "AddGraph failed in Session.");
  229. return FAILED;
  230. }
  231. GELOGD("AddGraph finished in Session.");
  232. return ret;
  233. }
  234. Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph) {
  235. std::map<AscendString, AscendString> options;
  236. return AddGraphWithCopy(graph_id, graph, options);
  237. }
  238. Status Session::AddGraphWithCopy(uint32_t graph_id, const Graph &graph,
  239. const std::map<AscendString, AscendString> &options) {
  240. GELOGT(TRACE_INIT, "Start to add graph in Session. graph_id: %u, session_id: %lu.", graph_id, sessionId_);
  241. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  242. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  243. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "AddGraph failed in Session.");
  244. return FAILED;
  245. }
  246. std::map<std::string, std::string> str_options;
  247. for (auto it = options.begin(); it != options.end(); ++it) {
  248. str_options.insert({it->first.GetString(), it->second.GetString()});
  249. }
  250. GELOGD("Adding graph to session");
  251. Status ret = instance_ptr->SessionManagerObj().AddGraphWithCopy(sessionId_, graph_id, graph, str_options);
  252. if (ret != SUCCESS) {
  253. GELOGE(ret, "AddGraph failed in Session.");
  254. return FAILED;
  255. }
  256. GELOGD("AddGraph finished in Session.");
  257. return ret;
  258. }
  259. Status Session::RemoveGraph(uint32_t graph_id) {
  260. GELOGT(TRACE_INIT, "Session RemoveGraph start");
  261. // call RemoveGraph
  262. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  263. if (!instance_ptr || !instance_ptr->InitFlag()) {
  264. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RemoveGraph failed");
  265. return FAILED;
  266. }
  267. GELOGT(TRACE_RUNNING, "Removing Graph from session");
  268. Status ret = instance_ptr->SessionManagerObj().RemoveGraph(sessionId_, graph_id);
  269. // check return status, return
  270. if (ret != SUCCESS) {
  271. GELOGE(ret, "session RemoveGraph failed");
  272. return FAILED;
  273. }
  274. GELOGT(TRACE_STOP, "Session RemoveGraph finished");
  275. return ret;
  276. }
  277. void PrintOutputResult(std::vector<Tensor> &outputs) {
  278. if (outputs.empty() || outputs[0].GetData() == nullptr) {
  279. GELOGW("outputs is empty or data is nullptr.");
  280. return;
  281. }
  282. size_t out_buf_size = outputs[0].GetSize();
  283. TensorDesc desc(outputs[0].GetTensorDesc());
  284. DataType data_type = desc.GetDataType();
  285. auto iter = CONST_OPDATA_TYPE_SIZE_MAP.find(data_type);
  286. if (iter == CONST_OPDATA_TYPE_SIZE_MAP.end()) {
  287. GELOGI("DataType %s has not defined size", TypeUtils::DataTypeToSerialString(data_type).c_str());
  288. return;
  289. }
  290. size_t length = CONST_OPDATA_TYPE_SIZE_MAP[data_type];
  291. for (size_t i = 0; i < 10 && i < (out_buf_size / length); ++i) { // take first 10 at most
  292. switch (data_type) {
  293. case DT_BOOL:
  294. case DT_INT8:
  295. case DT_UINT8:
  296. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int8_t *>(outputs[0].GetData()) + i));
  297. break;
  298. case DT_INT16:
  299. case DT_UINT16:
  300. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int16_t *>(outputs[0].GetData()) + i));
  301. break;
  302. case DT_INT32:
  303. case DT_UINT32:
  304. GELOGI("output data[%zu]=%d", i, *(reinterpret_cast<int32_t *>(outputs[0].GetData()) + i));
  305. break;
  306. case DT_INT64:
  307. case DT_UINT64:
  308. GELOGI("output data[%zu]=%ld", i, *(reinterpret_cast<int64_t *>(outputs[0].GetData()) + i));
  309. break;
  310. case DT_FLOAT:
  311. GELOGI("output data[%zu]=%f", i, *(reinterpret_cast<float *>(outputs[0].GetData()) + i));
  312. break;
  313. case DT_DOUBLE:
  314. GELOGI("output data[%zu]=%lf", i, *(reinterpret_cast<double *>(outputs[0].GetData()) + i));
  315. break;
  316. default:
  317. GELOGI("Output datatype %s is not supported.", TypeUtils::DataTypeToSerialString(data_type).c_str());
  318. return;
  319. }
  320. }
  321. }
  322. Status Session::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inputs, std::vector<Tensor> &outputs) {
  323. GELOGT(TRACE_INIT, "Session RunGraph start");
  324. std::vector<Tensor> graph_inputs = inputs;
  325. // call RunGraph
  326. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  327. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  328. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Session RunGraph failed");
  329. return FAILED;
  330. }
  331. GELOGT(TRACE_RUNNING, "Running Graph");
  332. Status ret = instance_ptr->SessionManagerObj().RunGraph(sessionId_, graph_id, graph_inputs, outputs);
  333. // check return status
  334. if (ret != SUCCESS) {
  335. GELOGE(ret, "Session RunGraph failed");
  336. return FAILED;
  337. }
  338. // print output
  339. if (outputs.size() > 0) {
  340. PrintOutputResult(outputs);
  341. }
  342. // return
  343. GELOGT(TRACE_STOP, "Session RunGraph finished");
  344. return ret;
  345. }
  346. Status Session::RegisterCallBackFunc(const std::string &key, const pCallBackFunc &callback) {
  347. return ge::GELib::GetInstance()->SessionManagerObj().RegisterCallBackFunc(sessionId_, key, callback);
  348. }
  349. Status Session::BuildGraph(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
  350. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  351. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  352. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
  353. return FAILED;
  354. }
  355. GELOGT(TRACE_RUNNING, "Building Graph");
  356. Status ret = instance_ptr->SessionManagerObj().BuildGraph(sessionId_, graph_id, inputs);
  357. if (ret != SUCCESS) {
  358. GELOGE(ret, "Session BuildGraph failed");
  359. return FAILED;
  360. }
  361. return SUCCESS;
  362. }
  363. Status Session::RunGraphAsync(uint32_t graph_id, const std::vector<InputTensorInfo> &inputs,
  364. RunAsyncCallback callback) {
  365. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  366. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  367. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
  368. return FAILED;
  369. }
  370. GELOGT(TRACE_RUNNING, "Run Graph Asynchronously");
  371. GELOGW(
  372. "The callback function will not be checked. Please ensure that the implementation of the function is trusted.");
  373. Status ret = ge::GELib::GetInstance()->SessionManagerObj().RunGraphAsync(sessionId_, graph_id, inputs, callback);
  374. if (ret != SUCCESS) {
  375. GELOGE(ret, "SessionManager RunGraphAsync failed");
  376. return FAILED;
  377. }
  378. return SUCCESS;
  379. }
  380. Status Session::GetVariables(const std::vector<std::string> &var_names, std::vector<Tensor> &var_values) {
  381. auto instance_ptr = ge::GELib::GetInstance();
  382. if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
  383. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "SessionConstructor failed");
  384. return FAILED;
  385. }
  386. GELOGT(TRACE_RUNNING, "Get Variables");
  387. Status ret = ge::GELib::GetInstance()->SessionManagerObj().GetVariables(sessionId_, var_names, var_values);
  388. if (ret != SUCCESS) {
  389. GELOGE(ret, "SessionManager RunGraphAsync failed");
  390. return FAILED;
  391. }
  392. return SUCCESS;
  393. }
  394. bool Session::IsGraphNeedRebuild(uint32_t graph_id) {
  395. return ge::GELib::GetInstance()->SessionManagerObj().IsGraphNeedRebuild(sessionId_, graph_id);
  396. }
  397. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示