You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dnnengine_manager.cc 15 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "engine_manager/dnnengine_manager.h"
  17. #include <unistd.h>
  18. #include <cstdio>
  19. #include <fstream>
  20. #include <map>
  21. #include <utility>
  22. #include "common/debug/log.h"
  23. #include "common/ge/ge_util.h"
  24. #include "framework/common/debug/ge_log.h"
  25. #include "graph/ge_context.h"
  26. #include "init/gelib.h"
  27. namespace {
  28. const char *const kSchedulerUnits = "schedule_units";
  29. const char *const kId = "id";
  30. const char *const kName = "name";
  31. const char *const kExAttrs = "ex_attrs";
  32. const char *const kIndependent = "independent";
  33. const char *const kSkipAssignStream = "skip_assign_stream";
  34. const char *const kCalEngines = "cal_engines";
  35. const char *const kAttch = "attach";
  36. const char *const kVectorCore = "VectorCore";
  37. const char *const kVectorEngine = "VectorEngine";
  38. const char *const kAIcoreEngine = "AIcoreEngine";
  39. const char *const kCustomOpFlag = "_custom_op_flag";
  40. } // namespace
  41. namespace ge {
  42. DNNEngineManager::DNNEngineManager() : init_flag_(false) {}
  43. DNNEngineManager::~DNNEngineManager() {
  44. engines_attrs_map_.clear();
  45. schedulers_.clear();
  46. }
  47. Status DNNEngineManager::Initialize(const std::map<std::string, std::string> &options) {
  48. // Multiple initializations are not supported
  49. if (init_flag_) {
  50. GELOGW("DNNEngineManager has been initialized.");
  51. return SUCCESS;
  52. }
  53. // Load engine so
  54. std::string so_path = "plugin/nnengine/";
  55. std::string path = PluginManager::GetPath();
  56. path.append(so_path);
  57. std::string so_api_func = "GetDNNEngineObjs";
  58. std::vector<std::string> so_func{so_api_func};
  59. Status status = plugin_mgr_.Load(path, so_func);
  60. if (status != SUCCESS) {
  61. GELOGE(status, "Load engine's so failed. LibPath is %s", path.c_str());
  62. return status;
  63. }
  64. status = plugin_mgr_.InvokeAll<std::map<std::string, DNNEnginePtr> &>(so_api_func, engines_map_);
  65. if (status != SUCCESS) {
  66. GELOGE(status, "Get DNNEngineObjs failed.");
  67. return status;
  68. }
  69. GELOGI("The number of DNNEngineObjs is %zu.", engines_map_.size());
  70. // Engines initialize
  71. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  72. if (iter->second == nullptr) {
  73. GELOGI("Engine: %s point to nullptr", (iter->first).c_str());
  74. continue;
  75. }
  76. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  77. status = iter->second->Initialize(options);
  78. if (status != SUCCESS) {
  79. GELOGE(status, "Engine: %s initialize failed.", (iter->first).c_str());
  80. return status;
  81. }
  82. // Check engines' attribute
  83. DNNEngineAttribute attrs;
  84. iter->second->GetAttributes(attrs);
  85. if (attrs.runtime_type == RuntimeType::DEVICE) {
  86. if ((attrs.mem_type.size()) != 1 || (attrs.mem_type[0] != GE_ENGINE_ATTR_MEM_TYPE_HBM)) {
  87. GELOGE(GE_ENG_MEMTYPE_ERROR, "Engine: %s in aicore, but the memory type is not HBM", (iter->first).c_str());
  88. return GE_ENG_MEMTYPE_ERROR;
  89. }
  90. }
  91. }
  92. status = ParserJsonFile();
  93. if (status != SUCCESS) {
  94. GELOGE(status, "parse json file failed");
  95. return status;
  96. }
  97. status = CheckJsonFile();
  98. if (status != SUCCESS) {
  99. GELOGE(status, "check json file failed");
  100. return status;
  101. }
  102. init_flag_ = true;
  103. return SUCCESS;
  104. }
  105. Status DNNEngineManager::Finalize() {
  106. // Finalize is not allowed, initialize first is necessary
  107. if (!init_flag_) {
  108. GELOGW("DNNEngineManager has been finalized.");
  109. return SUCCESS;
  110. }
  111. for (auto iter = engines_map_.begin(); iter != engines_map_.end(); ++iter) {
  112. if (iter->second != nullptr) {
  113. GELOGI("DNNEngine name: %s.", (iter->first).c_str());
  114. Status status = iter->second->Finalize();
  115. if (status != SUCCESS) {
  116. GELOGE(status, "Engine finalize failed.");
  117. return status;
  118. }
  119. }
  120. }
  121. init_flag_ = false;
  122. engines_map_.clear();
  123. return SUCCESS;
  124. }
  125. std::shared_ptr<ge::DNNEngine> DNNEngineManager::GetEngine(const std::string &name) const {
  126. auto iter = engines_map_.find(name);
  127. if (iter != engines_map_.end()) {
  128. return iter->second;
  129. }
  130. GELOGW("Failed to get engine object by engine name. %s.", name.c_str());
  131. return nullptr;
  132. }
  133. bool DNNEngineManager::IsEngineRegistered(const std::string &name) {
  134. auto iter = engines_map_.find(name);
  135. if (iter != engines_map_.end()) {
  136. return true;
  137. }
  138. GELOGW("Engine: %s is not Registered", name.c_str());
  139. return false;
  140. }
  141. std::string DNNEngineManager::GetDNNEngineName(const OpDescPtr &op_desc) {
  142. GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGE(GE_CLI_GE_NOT_INITIALIZED, "DNNEngineManager: op_desc is nullptr");
  143. return "");
  144. // Use the OpsKernelManager in GELib to get the opInfos for this opCode
  145. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  146. if ((instance_ptr == nullptr) || (!instance_ptr->InitFlag())) {
  147. GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GetDNNEngineName failed.");
  148. return "";
  149. }
  150. OpsKernelManager &ops_kernel_manager = instance_ptr->OpsKernelManagerObj();
  151. std::vector<OpInfo> op_infos = ops_kernel_manager.GetOpsKernelInfo(op_desc->GetType());
  152. if (op_infos.empty()) {
  153. GELOGI("DNNEngineManager: Can not get op info by op type %s", op_desc->GetType().c_str());
  154. return "";
  155. }
  156. string ge_core_type;
  157. Status ret = ge::GetContext().GetOption(ge::CORE_TYPE, ge_core_type);
  158. if (ret != SUCCESS) {
  159. GELOGD("get the option CORE_TYPE fail, set it to default value VECTOR_ENGINE");
  160. }
  161. string exclude_core_Type = (ge_core_type == kVectorCore) ? kAIcoreEngine : kVectorEngine;
  162. GELOGD("engine type will exclude: %s", exclude_core_Type.c_str());
  163. std::map<std::string, std::string> unsupported_reasons;
  164. for (const auto &it : op_infos) {
  165. if (it.engine == exclude_core_Type) {
  166. continue;
  167. }
  168. auto &kernel_map = ops_kernel_manager.GetAllOpsKernelInfoStores();
  169. auto &kernel_name = it.opKernelLib;
  170. auto kernel_info_store = kernel_map.find(kernel_name);
  171. if (kernel_info_store != kernel_map.end()) {
  172. std::string unsupported_reason;
  173. // It will be replaced by engine' checksupport
  174. if (kernel_info_store->second->CheckSupported(op_desc, unsupported_reason)) {
  175. op_desc->SetOpEngineName(it.engine);
  176. op_desc->SetOpKernelLibName(kernel_name);
  177. GELOGD("DNNEngineManager:Set OpKernelLibName %s and engine name %s into op_desc %s", kernel_name.c_str(),
  178. it.engine.c_str(), op_desc->GetName().c_str());
  179. return it.engine;
  180. } else {
  181. bool is_custom_op = false;
  182. if ((ge::AttrUtils::GetBool(op_desc, kCustomOpFlag, is_custom_op)) && is_custom_op) {
  183. GELOGE(FAILED,
  184. "The custom operator registered by the user does not support the logic function delivered by this "
  185. "network. Check support failed, kernel_name is %s, op type is %s, op name is %s",
  186. kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  187. return "";
  188. }
  189. unsupported_reasons.emplace(kernel_name, unsupported_reason);
  190. GELOGI("DNNEngineManager:Check support failed, kernel_name is %s, op type is %s, op name is %s",
  191. kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  192. }
  193. } else {
  194. GELOGW(
  195. "DNNEngineManager:Can not find any supported ops kernel info store by kernel_name %s,"
  196. "op type is %s, op name is %s",
  197. kernel_name.c_str(), op_desc->GetType().c_str(), op_desc->GetName().c_str());
  198. }
  199. }
  200. for (const auto &it : unsupported_reasons) {
  201. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "GetDNNEngineName:Op type %s of ops kernel %s is unsupported, reason:%s",
  202. op_desc->GetType().c_str(), it.first.c_str(), it.second.c_str());
  203. }
  204. GELOGE(GE_GRAPH_ASSIGN_ENGINE_FAILED, "Can't find any supported ops kernel and engine of %s, type is %s",
  205. op_desc->GetName().c_str(), op_desc->GetType().c_str());
  206. return "";
  207. }
  208. const std::map<std::string, SchedulerConf> &DNNEngineManager::GetSchedulers() const { return schedulers_; }
  209. Status DNNEngineManager::ParserJsonFile() {
  210. GELOGI("Begin to parser json file");
  211. std::string json_file_path = "plugin/nnengine/ge_config/engine_conf.json";
  212. std::string path = PluginManager::GetPath();
  213. path.append(json_file_path);
  214. nlohmann::json scheduler_json_file;
  215. Status status = ReadJsonFile(path, &scheduler_json_file);
  216. if (status != SUCCESS) {
  217. GELOGE(FAILED, "Read scheduler json file failed and the file path is %s", path.c_str());
  218. return FAILED;
  219. }
  220. if (scheduler_json_file.is_null()) {
  221. // when engine_conf.json is not exist, just return success
  222. GELOGW("Json file is null");
  223. return SUCCESS;
  224. }
  225. try {
  226. nlohmann::json scheduler_utils_json = scheduler_json_file[kSchedulerUnits];
  227. if (scheduler_utils_json.is_null()) {
  228. GELOGE(FAILED, "The message of scheduler units is not found");
  229. return FAILED;
  230. }
  231. if (!scheduler_utils_json.is_array()) {
  232. GELOGE(FAILED, "The message of kSchedulerUnits is not array and the file path is %s", json_file_path.c_str());
  233. return FAILED;
  234. }
  235. auto size = scheduler_json_file[kSchedulerUnits].size();
  236. for (size_t i = 0; i < size; i++) {
  237. SchedulerConf scheduler_conf;
  238. std::map<std::string, EngineConfPtr> engine_conf_map;
  239. nlohmann::json engines_json_map = scheduler_utils_json[i][kCalEngines];
  240. if (engines_json_map.is_null()) {
  241. GELOGE(FAILED, "The message of cal_engines is not found");
  242. return FAILED;
  243. }
  244. std::string scheduler_id_temp = scheduler_utils_json[i][kId];
  245. if (!scheduler_id_temp.empty()) {
  246. scheduler_conf.id = scheduler_id_temp;
  247. } else {
  248. GELOGE(FAILED, "Scheduler ID is null");
  249. return FAILED;
  250. }
  251. status = ParserEngineMessage(engines_json_map, scheduler_id_temp, engine_conf_map);
  252. if (status != SUCCESS) {
  253. GELOGE(FAILED, "Parser engines messages failed");
  254. return FAILED;
  255. }
  256. scheduler_conf.name = scheduler_utils_json[i][kName];
  257. scheduler_conf.ex_attrs = scheduler_utils_json[i][kExAttrs];
  258. scheduler_conf.cal_engines = engine_conf_map;
  259. auto it = schedulers_.find(scheduler_id_temp);
  260. if (it != schedulers_.end()) {
  261. GELOGE(FAILED, "There are the same scheduler ts %s in the json file", scheduler_id_temp.c_str());
  262. return FAILED;
  263. }
  264. schedulers_.emplace(scheduler_id_temp, scheduler_conf);
  265. }
  266. } catch (const nlohmann::detail::type_error &e) {
  267. GELOGE(FAILED, "Parser json file failed");
  268. return FAILED;
  269. }
  270. GELOGI("Parser json file SUCCESS");
  271. return SUCCESS;
  272. }
  273. Status DNNEngineManager::ParserEngineMessage(const json engines_json, const std::string &scheduler_mark,
  274. std::map<std::string, EngineConfPtr> &engines) {
  275. GELOGI("Begin to parser engine massage");
  276. if (engines_json.is_null()) {
  277. GELOGE(FAILED, "The message of cal_engines is null");
  278. return FAILED;
  279. }
  280. try {
  281. if (engines_json.is_array()) {
  282. for (size_t i = 0; i < engines_json.size(); i++) {
  283. nlohmann::json engines_elems = engines_json[i];
  284. EngineConfPtr engine_conf_ptr = MakeShared<EngineConf>();
  285. if (engine_conf_ptr == nullptr) {
  286. return FAILED;
  287. }
  288. std::string engine_id = engines_elems[kId];
  289. if (!engine_id.empty()) {
  290. engine_conf_ptr->id = engine_id;
  291. } else {
  292. GELOGE(FAILED, "engineID is null");
  293. return FAILED;
  294. }
  295. if (engines_elems.find(kName) != engines_elems.end()) {
  296. engine_conf_ptr->name = engines_elems[kName];
  297. } else {
  298. GELOGW("The engine %s name is null", engine_id.c_str());
  299. }
  300. if (engines_elems.find(kIndependent) != engines_elems.end()) {
  301. engine_conf_ptr->independent = engines_elems[kIndependent];
  302. }
  303. if (engines_elems.find(kAttch) != engines_elems.end()) {
  304. engine_conf_ptr->attach = engines_elems[kAttch];
  305. }
  306. if (engines_elems.find(kSkipAssignStream) != engines_elems.end()) {
  307. engine_conf_ptr->skip_assign_stream = engines_elems[kSkipAssignStream];
  308. }
  309. engine_conf_ptr->scheduler_id = scheduler_mark;
  310. auto it = engines.find(engine_id);
  311. if (it != engines.end()) {
  312. GELOGE(FAILED, "There are the same engine %s message in the json file", engine_id.c_str());
  313. return FAILED;
  314. }
  315. engines.emplace(engine_id, engine_conf_ptr);
  316. }
  317. } else {
  318. GELOGE(FAILED, "The message of cal_engines is not array in the json file");
  319. return FAILED;
  320. }
  321. } catch (const json::exception &e) {
  322. GELOGE(FAILED, "construct json content failed");
  323. return FAILED;
  324. }
  325. GELOGI("Parser engine massage success");
  326. return SUCCESS;
  327. }
  328. Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle handle) {
  329. GELOGI("Begin to read json file");
  330. if (file_path.empty()) {
  331. GELOGE(FAILED, "Json path %s is not valid", file_path.c_str());
  332. return FAILED;
  333. }
  334. nlohmann::json *json_file = reinterpret_cast<nlohmann::json *>(handle);
  335. if (json_file == nullptr) {
  336. GELOGE(FAILED, "JsonFile is nullptr");
  337. return FAILED;
  338. }
  339. const char *file = file_path.data();
  340. if ((access(file, F_OK)) == -1) {
  341. if (engines_map_.size() != 0) {
  342. GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno));
  343. return FAILED;
  344. } else {
  345. GELOGW("The json file %s is not needed.", file_path.c_str());
  346. return SUCCESS;
  347. }
  348. }
  349. std::ifstream ifs(file_path);
  350. if (!ifs.is_open()) {
  351. GELOGE(FAILED, "Open json file %s failed", file_path.c_str());
  352. return FAILED;
  353. }
  354. ifs >> *json_file;
  355. ifs.close();
  356. GELOGI("Read json file success");
  357. return SUCCESS;
  358. }
  359. Status DNNEngineManager::CheckJsonFile() {
  360. GELOGI("Begin to check json file");
  361. for (auto &it : engines_map_) {
  362. std::string engine_name = it.first;
  363. int count = 0;
  364. for (auto &iter : schedulers_) {
  365. auto engine_map = iter.second.cal_engines;
  366. auto iter_engine_name = engine_map.find(engine_name);
  367. if (iter_engine_name != engine_map.end()) {
  368. count++;
  369. }
  370. }
  371. if (count == 0) {
  372. GELOGE(FAILED, "The engine message %s is not found in the json file", engine_name.c_str());
  373. return FAILED;
  374. }
  375. if (count > 1) {
  376. GELOGE(FAILED, "The same engine message %s is existed in the json file", engine_name.c_str());
  377. return FAILED;
  378. }
  379. }
  380. GELOGI("Check json file success");
  381. return SUCCESS;
  382. }
  383. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示