You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

model_cache_helper.cc 65 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <fcntl.h>
  17. #include <unistd.h>
  18. #include <climits>
  19. #include <cstdio>
  20. #include <fstream>
  21. #include <functional>
  22. #include "common/ge/ge_util.h"
  23. #include "common/helper/model_cache_helper.h"
  24. #include "common/types.h"
  25. #include "framework/common/debug/ge_log.h"
  26. #include "framework/common/ge_types.h"
  27. #include "framework/common/helper/model_helper.h"
  28. #include "framework/common/util.h"
  29. #include "graph/detail/attributes_holder.h"
  30. #include "graph/detail/model_serialize_imp.h"
  31. #include "graph/load/new_model_manager/davinci_model_parser.h"
  32. #include "graph/model.h"
  33. #include "graph/utils/graph_utils.h"
  34. #include "graph/utils/tensor_utils.h"
  35. #include "init/gelib.h"
  36. #include "proto/ge_ir.pb.h"
  37. using namespace std;
  38. namespace {
  39. const char *const kTbeKernelInfoStoreName = "AIcoreEngine";
  40. const char *const kGraphName = "temp_name";
  41. // Keys of json
  42. const char *const kNodeNum = "nodeNum";
  43. const char *const kEdgeNum = "edgeNum";
  44. const char *const kGraphHash = "graphHash";
  45. const char *const kNodeHash = "nodeHash";
  46. const char *const kHash = "hash";
  47. const char *const kSessionId = "sessionId";
  48. const char *const kDeviceId = "deviceId";
  49. const char *const kJobId = "jobId";
  50. const char *const kGraphMemMaxSize = "graphMemMaxSize";
  51. const char *const kVarMemMaxSize = "varMemMaxSize";
  52. const char *const kVarMemLogicBase = "varMemLogicBase";
  53. const char *const kUseMaxMemSize = "useMaxMemSize";
  54. const char *const kMemResourceMap = "memResourceMap";
  55. const char *const kMemType = "memType";
  56. const char *const kTotalSize = "totalSize";
  57. const char *const kVarMemSize = "varMemSize";
  58. const char *const kVarResource = "varResource";
  59. const char *const kVarAddrMgrMap = "varAddrMgrMap";
  60. const char *const kName = "name";
  61. const char *const kAddress = "address";
  62. const char *const kOffset = "offset";
  63. const char *const kMemoryType = "memoryType";
  64. const char *const kTensorDesc = "tensorDesc";
  65. const char *const kDataType = "dataType";
  66. const char *const kShape = "shape";
  67. const char *const kLayout = "layout";
  68. const char *const kOriginDataType = "originDataType";
  69. const char *const kOriginShape = "originShape";
  70. const char *const kOriginLayout = "originLayout";
  71. const char *const kRealDimCnt = "realDimCnt";
  72. const char *const kCurVarTensorDescMap = "curVarTensorDescMap";
  73. const char *const kTransRoads = "transRoads";
  74. const char *const kTransRoad = "transRoad";
  75. const char *const kNodeType = "nodeType";
  76. const char *const kInputTensorDesc = "inputTensorDesc";
  77. const char *const kOutputTensorDesc = "outputTensorDesc";
  78. const char *const kChangedGraphId = "changedGraphId";
  79. const char *const kAllocatedGraphId = "allocatedGraphId";
  80. const char *const kGraphId = "graphId";
  81. const char *const kVarBroadcastInfo = "varBroadcastInfo";
  82. const char *const kBroadcastName = "broadcastName";
  83. const char *const kIdx = "idx";
  84. const char *const kInputOffset = "inputOffset";
  85. const char *const kInputSize = "inputSize";
  86. const char *const kOutputOffset = "outputOffset";
  87. const char *const kOutputSize = "outputSize";
  88. // Suffix of cache files
  89. const char *const kBeforeVarManagerSuffix = "_before_build_var_manager.json";
  90. const char *const kAfterVarManagerSuffix = "_after_build_var_manager.json";
  91. const char *const kManifestSuffix = ".manifest";
  92. const char *const kOmSuffix = ".om";
  93. } // namespace
  94. namespace ge {
  95. map<uint32_t, uint32_t> ModelCacheHelper::graph_id_run_times_;
  96. ModelCacheHelper::ModelCacheHelper(uint64_t session_id, uint32_t graph_id, ComputeGraphPtr &compute_graph)
  97. : session_id_(session_id),
  98. graph_id_(graph_id),
  99. compute_graph_(compute_graph),
  100. is_cache_path_valid_for_output(false) {
  101. if (graph_id_run_times_.count(graph_id) == 0) {
  102. graph_id_run_times_[graph_id] = 1;
  103. } else {
  104. graph_id_run_times_[graph_id] = graph_id_run_times_[graph_id] + 1;
  105. }
  106. for (const auto &node : compute_graph_->GetDirectNode()) {
  107. bool is_variable = (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) ||
  108. (node->GetType() == VARHANDLEOP) || (node->GetType() == CONSTANTOP);
  109. if (!is_variable) {
  110. continue;
  111. }
  112. var_names_.insert(node->GetName());
  113. }
  114. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  115. if (instance_ptr != nullptr && instance_ptr->IsIncreBuild()) {
  116. std::string cache_path = instance_ptr->GetIncreBuildCachePath();
  117. GELOGD("Incre build path conf: %s", cache_path.c_str());
  118. string fake_file_path = cache_path + to_string(graph_id_) + kManifestSuffix;
  119. if (CheckOutputPathValid(fake_file_path)) {
  120. is_cache_path_valid_for_output = true;
  121. } else {
  122. GELOGW("Invalid cache path for output.");
  123. }
  124. std::string real_cache_path = RealPath(cache_path.c_str());
  125. if (real_cache_path.empty()) {
  126. GELOGW("Invalid incre build cache path conf: %s", cache_path.c_str());
  127. return;
  128. }
  129. cache_path_ = real_cache_path + '/';
  130. GELOGD("Try to use incre build cache path: %s", cache_path_.c_str());
  131. }
  132. }
  133. ModelCacheHelper::~ModelCacheHelper() { var_names_.clear(); }
  134. bool ModelCacheHelper::IsModelCacheHit() const {
  135. CacheInfo cache_info;
  136. if (GetCacheInfo(cache_info) != SUCCESS) {
  137. GELOGI("Get cache info of graph id[%u] failed.", graph_id_);
  138. return false;
  139. }
  140. // Check number of nodes and edges first.
  141. if (cache_info.node_num != compute_graph_->GetDirectNodesSize()) {
  142. GELOGI("Graph id[%u] cache miss: the node number of the graph does not match the cache info.", graph_id_);
  143. return false;
  144. }
  145. size_t edge_num = 0;
  146. for (const auto &node : compute_graph_->GetDirectNode()) {
  147. for (const auto &anchor : node->GetAllInAnchors()) {
  148. edge_num += anchor->GetPeerAnchors().size();
  149. }
  150. }
  151. if (cache_info.edge_num != edge_num) {
  152. GELOGI("Graph id[%u] cache miss: the edge number of the graph does not match the cache info.", graph_id_);
  153. return false;
  154. }
  155. size_t compute_graph_hash;
  156. auto ret = GetComputeGraphHash(compute_graph_hash);
  157. if (ret != SUCCESS || cache_info.graph_hash != compute_graph_hash) {
  158. GELOGI("Graph id[%u] cache miss: the hash code of the graph does not match the cache info.", graph_id_);
  159. return false;
  160. }
  161. if (!IsNodeHashSameAsCache(cache_info.nodes_hash)) {
  162. GELOGI("Graph id[%u] cache miss: the hash code of node does not match the cache info.", graph_id_);
  163. return false;
  164. }
  165. string var_manager_cache =
  166. to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kBeforeVarManagerSuffix;
  167. Json var_manager_json;
  168. if (LoadJsonFromFile(var_manager_cache, var_manager_json) != SUCCESS) {
  169. GELOGW("Fail to load json from cache file: %s", var_manager_cache.c_str());
  170. return false;
  171. }
  172. if (!IsVarManagerSameAsCache(var_manager_json)) {
  173. GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_);
  174. return false;
  175. }
  176. GELOGI("Graph id[%u] cache hit.", graph_id_);
  177. return true;
  178. }
  179. Status ModelCacheHelper::RefreshComputeGraph(const ComputeGraphPtr &compute_graph) {
  180. if (compute_graph->IsValid()) {
  181. compute_graph_ = compute_graph;
  182. var_names_.clear();
  183. for (const auto &node : compute_graph_->GetDirectNode()) {
  184. bool is_variable = (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) ||
  185. (node->GetType() == VARHANDLEOP) || (node->GetType() == CONSTANTOP);
  186. if (!is_variable) {
  187. continue;
  188. }
  189. var_names_.insert(node->GetName());
  190. }
  191. return SUCCESS;
  192. } else {
  193. GELOGW("Invalid compute graph.");
  194. return FAILED;
  195. }
  196. }
  197. Status ModelCacheHelper::ClearCache(uint32_t graph_id) const {
  198. if (!is_cache_path_valid_for_output) {
  199. GELOGW("Invalid cache path.");
  200. return SUCCESS;
  201. }
  202. string manifest_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  203. string manifest_file_path = RealPath(manifest_file.c_str());
  204. int ret;
  205. if (!manifest_file_path.empty()) {
  206. ret = remove(manifest_file_path.c_str());
  207. // If remove file failed, print the warning log
  208. if (ret != 0) {
  209. GELOGW("Clear cache [%s] failed.", manifest_file_path.c_str());
  210. }
  211. }
  212. string before_var_manager_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  213. string before_var_manager_file_path = RealPath(before_var_manager_file.c_str());
  214. if (!before_var_manager_file_path.empty()) {
  215. ret = remove(before_var_manager_file_path.c_str());
  216. if (ret != 0) {
  217. GELOGW("Clear cache [%s] failed.", before_var_manager_file_path.c_str());
  218. }
  219. }
  220. string after_var_manager_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  221. string after_var_manager_file_path = RealPath(after_var_manager_file.c_str());
  222. if (!after_var_manager_file_path.empty()) {
  223. ret = remove(after_var_manager_file_path.c_str());
  224. if (ret != 0) {
  225. GELOGW("Clear cache [%s] failed.", after_var_manager_file_path.c_str());
  226. }
  227. }
  228. string om_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  229. string om_file_path = RealPath(om_file.c_str());
  230. if (!om_file_path.empty()) {
  231. ret = remove(om_file_path.c_str());
  232. if (ret != 0) {
  233. GELOGW("Clear cache [%s] failed.", om_file_path.c_str());
  234. }
  235. }
  236. return SUCCESS;
  237. }
  238. Status ModelCacheHelper::RecoverVarManagerFromCache() const {
  239. string var_manager_cache =
  240. to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kAfterVarManagerSuffix;
  241. Json var_manager_json;
  242. if (LoadJsonFromFile(var_manager_cache, var_manager_json) != SUCCESS) {
  243. GELOGW("Fail to load json from cache file: %s", var_manager_cache.c_str());
  244. return FAILED;
  245. }
  246. Json mem_resource_json = move(var_manager_json[kMemResourceMap]);
  247. auto ret = RecoverMemResource(mem_resource_json);
  248. if (ret != SUCCESS) {
  249. GELOGW("Recover VarManager from cache failed.[MemResource]");
  250. return FAILED;
  251. }
  252. Json var_resource_json = move(var_manager_json[kVarResource]);
  253. ret = RecoverAllocatedGraphId(var_resource_json[kAllocatedGraphId]);
  254. if (ret != SUCCESS) {
  255. GELOGW("Recover VarManager from cache failed.[AllocatedGraphId]");
  256. return FAILED;
  257. }
  258. ret = RecoverChangedGraphId(var_resource_json[kChangedGraphId]);
  259. if (ret != SUCCESS) {
  260. GELOGW("Recover VarManager from cache failed.[ChangedGraphId]");
  261. return FAILED;
  262. }
  263. ret = RecoverBroadcastInfo(var_resource_json[kVarBroadcastInfo]);
  264. if (ret != SUCCESS) {
  265. GELOGW("Recover VarManager from cache failed.[VarBroadcastInfo]");
  266. return FAILED;
  267. }
  268. ret = RecoverVarAddrAndTensorDesc(var_resource_json[kVarAddrMgrMap]);
  269. if (ret != SUCCESS) {
  270. GELOGW("Recover VarManager from cache failed.[VarAddrMgrMap & CurVarTensorDesc]");
  271. return FAILED;
  272. }
  273. ret = RecoverTransRoads(var_resource_json[kTransRoads]);
  274. if (ret != SUCCESS) {
  275. GELOGW("Recover VarManager from cache failed.[TransRoads]");
  276. return FAILED;
  277. }
  278. GELOGI("Recover VarManager from cache[%s] success.", cache_path_.c_str());
  279. return SUCCESS;
  280. }
  281. Status ModelCacheHelper::GetNodesNeedRecompile(ComputeGraphPtr &graph, vector<NodePtr> &nodes) {
  282. std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
  283. if (instance == nullptr || !instance->InitFlag()) {
  284. GELOGW("RecompileNodes failed.");
  285. return ge::GE_CLI_GE_NOT_INITIALIZED;
  286. }
  287. // Collect aicore ops for recompile
  288. for (auto &node : graph->GetDirectNode()) {
  289. if (node == nullptr) {
  290. continue;
  291. }
  292. auto op_desc = node->GetOpDesc();
  293. if (op_desc == nullptr) {
  294. continue;
  295. }
  296. // Get op kernel lib name
  297. string kernel_lib_name = op_desc->GetOpKernelLibName();
  298. if (kernel_lib_name.empty()) {
  299. // reset op kernel lib
  300. (void)instance->DNNEngineManagerObj().GetDNNEngineName(node);
  301. kernel_lib_name = op_desc->GetOpKernelLibName();
  302. if (kernel_lib_name.empty()) {
  303. GELOGW("Get node:%s, type:%s kernel lib failed.", node->GetName().c_str(), op_desc->GetType().c_str());
  304. continue;
  305. }
  306. }
  307. }
  308. return SUCCESS;
  309. }
  310. Status ModelCacheHelper::RecompileNodes(GeModelPtr &ge_model) {
  311. std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
  312. if (instance == nullptr || !instance->InitFlag()) {
  313. GELOGW("RecompileNodes failed.");
  314. return ge::GE_CLI_GE_NOT_INITIALIZED;
  315. }
  316. // Get aicore ops kernel info store.
  317. OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kTbeKernelInfoStoreName);
  318. if (kernel_info == nullptr) {
  319. GELOGW("Get %s ops kernel info store failed", kTbeKernelInfoStoreName);
  320. return INTERNAL_ERROR;
  321. }
  322. auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  323. vector<NodePtr> node_vec;
  324. auto ret = GetNodesNeedRecompile(compute_graph, node_vec);
  325. GE_CHK_BOOL_EXEC_WARN(ret == ge::SUCCESS, return ret, "Get nodes need recompiling failed");
  326. // Recompile aicore ops
  327. ret = kernel_info->CompileOp(node_vec);
  328. GE_CHK_BOOL_EXEC_WARN(ret == ge::SUCCESS, return ret, "Recompile op failed");
  329. const TBEKernelStore &tbekernel_store = ge_model->GetTBEKernelStore();
  330. TBEKernelStore tbe_kernel_store;
  331. for (const ge::NodePtr &n : compute_graph->GetDirectNode()) {
  332. auto node_op_desc = n->GetOpDesc();
  333. GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
  334. TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  335. if (tbe_kernel == nullptr) {
  336. // Load tbe kernel from tbe_kernel_store to op if op was not recompiled
  337. auto op_desc = n->GetOpDesc();
  338. tbekernel_store.LoadTBEKernelBinToOpDesc(op_desc);
  339. GELOGD("LoadOmModelFromCache: Load tbe kernel bin to op desc[%s].", op_desc->GetName().c_str());
  340. }
  341. tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  342. GE_IF_BOOL_EXEC(tbe_kernel == nullptr, continue);
  343. // Refresh tbe kernel in tbe_kernel_store
  344. tbe_kernel_store.AddTBEKernel(tbe_kernel);
  345. GELOGD("Add tbe kernel bin %s", tbe_kernel->GetName().c_str());
  346. }
  347. GE_CHK_BOOL_EXEC_WARN(tbe_kernel_store.Build(), return FAILED, "TBE Kernels store build failed!");
  348. ge_model->SetTBEKernelStore(tbe_kernel_store);
  349. return SUCCESS;
  350. }
  351. Status ModelCacheHelper::GetNodesHash(map<std::string, size_t> &hash_map) const {
  352. vector<NodePtr> nodes;
  353. GraphUtils::TopologicalSortingByName(compute_graph_, nodes);
  354. ModelSerializeImp model_serialize_imp;
  355. std::hash<string> node_hash;
  356. for (const auto &node : nodes) {
  357. if (node == nullptr) {
  358. continue;
  359. }
  360. proto::OpDef op_def;
  361. bool is_framework_op = (node->GetType() == FRAMEWORKOP);
  362. int32_t framework_type = 0;
  363. if (is_framework_op) {
  364. AttrUtils::GetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, framework_type);
  365. AttrUtils::SetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, 0);
  366. }
  367. bool ret = model_serialize_imp.SerializeNode(node, &op_def, is_framework_op);
  368. op_def.set_id(0); // Id of op is not stable because of parallel parsing
  369. // Clear weights attr in constant.
  370. auto attr = op_def.mutable_attr();
  371. if (op_def.type() == CONSTANT || op_def.type() == CONSTANTOP) {
  372. attr->erase(ATTR_NAME_WEIGHTS);
  373. }
  374. if (is_framework_op) {
  375. AttrUtils::SetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, framework_type);
  376. }
  377. if (!ret) {
  378. GELOGW("Fail to serialize node[%s].", node->GetName().c_str());
  379. return INTERNAL_ERROR;
  380. }
  381. string prototxt;
  382. ret = google::protobuf::TextFormat::PrintToString(op_def, &prototxt);
  383. if (!ret) {
  384. GELOGW("Print OpDef to string failed.");
  385. hash_map.clear();
  386. return INTERNAL_ERROR;
  387. }
  388. size_t hash_code = node_hash(prototxt);
  389. hash_map[node->GetName()] = hash_code;
  390. }
  391. return SUCCESS;
  392. }
  393. Status ModelCacheHelper::GetComputeGraphHash(size_t &hash) const {
  394. proto::GraphDef graph_proto;
  395. ModelSerializeImp model_serialize_imp;
  396. // The name of compute graph may be generated randomly, so replace it temporarily.
  397. const string origin_name = compute_graph_->GetName();
  398. compute_graph_->SetName(kGraphName);
  399. bool serialize_ret = model_serialize_imp.SerializeGraph(compute_graph_, &graph_proto);
  400. graph_proto.clear_op();
  401. if (!serialize_ret) {
  402. GELOGW("Serialize graph failed.");
  403. hash = 0;
  404. return INTERNAL_ERROR;
  405. }
  406. compute_graph_->SetName(origin_name);
  407. // Generate proto text of GraphDef
  408. string prototxt;
  409. bool print_ret = google::protobuf::TextFormat::PrintToString(graph_proto, &prototxt);
  410. if (!print_ret) {
  411. GELOGW("Print GraphDef to string failed.");
  412. hash = 0;
  413. return INTERNAL_ERROR;
  414. }
  415. // Get the hash code of proto text
  416. std::hash<string> graph_hash;
  417. hash = graph_hash(prototxt);
  418. return SUCCESS;
  419. }
  420. Status ModelCacheHelper::SaveJsonToFile(const string &file_name, const Json &json) const {
  421. if (!is_cache_path_valid_for_output) {
  422. GELOGW("Invalid cache path.");
  423. return PARAM_INVALID;
  424. }
  425. // Check whether the manifest exists, if not, create it.
  426. string real_path = RealPath(cache_path_.c_str());
  427. if (real_path.empty()) {
  428. GELOGW("File path is invalid. please check cache path: %s", cache_path_.c_str());
  429. return FAILED;
  430. }
  431. const string path = cache_path_ + file_name;
  432. const int FILE_AUTHORITY = 0600;
  433. int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY);
  434. if (fd < 0) {
  435. GELOGW("Fail to open the file: %s.", path.c_str());
  436. return INTERNAL_ERROR;
  437. }
  438. if (close(fd) != 0) {
  439. GELOGW("Fail to close the file: %s.", path.c_str());
  440. return INTERNAL_ERROR;
  441. }
  442. // Write json into cache file
  443. ofstream ofs;
  444. ofs.open(path);
  445. if (!ofs.is_open()) {
  446. GELOGW("Fail to open the file: %s.", path.c_str());
  447. return INTERNAL_ERROR;
  448. }
  449. ofs << json << std::endl;
  450. ofs.close();
  451. return SUCCESS;
  452. }
  453. Status ModelCacheHelper::LoadJsonFromFile(const string &file_name, Json &json) const {
  454. if (!json.is_null()) {
  455. GELOGW("Input param json type should be null.");
  456. return PARAM_INVALID;
  457. }
  458. string real_path = RealPath(cache_path_.c_str());
  459. if (real_path.empty()) {
  460. GELOGW("File path is invalid. please check cache path: %s", cache_path_.c_str());
  461. return FAILED;
  462. }
  463. const string path = cache_path_ + file_name;
  464. if (!CheckInputPathValid(path)) {
  465. GELOGW("Invalid cache path for input:%s.", path.c_str());
  466. return FAILED;
  467. }
  468. string cache_real_path = RealPath(path.c_str());
  469. if (cache_real_path.empty()) {
  470. GELOGI("File[%s] is not found.", path.c_str());
  471. return FAILED;
  472. }
  473. // Read json from cache file
  474. ifstream ifs;
  475. ifs.open(path);
  476. if (!ifs.is_open()) {
  477. GELOGW("Fail to open the file: %s.", path.c_str());
  478. return INTERNAL_ERROR;
  479. }
  480. ifs >> json;
  481. if (!json.is_object()) {
  482. GELOGW("Fail to load the json file: %s.", path.c_str());
  483. return INTERNAL_ERROR;
  484. }
  485. return SUCCESS;
  486. }
  487. Status ModelCacheHelper::SaveCacheInfoToCache() const {
  488. // Generate cache json
  489. // example: {"edgeNum":6,"nodeNum":7,"graphCache":134714827475991356}
  490. Json cache_json;
  491. try {
  492. cache_json[kNodeNum] = compute_graph_->GetDirectNodesSize();
  493. size_t edge_num = 0;
  494. for (const auto &node : compute_graph_->GetDirectNode()) {
  495. for (const auto &anchor : node->GetAllInAnchors()) {
  496. edge_num += anchor->GetPeerAnchors().size();
  497. }
  498. }
  499. cache_json[kEdgeNum] = edge_num;
  500. size_t hash = 0;
  501. auto ret = GetComputeGraphHash(hash);
  502. if (ret != SUCCESS) {
  503. GELOGW("Error occur when generate graph hash code.");
  504. return ret;
  505. }
  506. cache_json[kGraphHash] = hash;
  507. Json nodes_hash_json;
  508. ret = GetNodesHashMapJson(nodes_hash_json);
  509. if (ret != SUCCESS) {
  510. GELOGW("Error occur when generate nodes hash code.");
  511. return ret;
  512. }
  513. cache_json[kNodeHash] = nodes_hash_json;
  514. } catch (const std::exception &e) {
  515. GELOGW("Fail to generate cache info json. Error message: %s", e.what());
  516. return INTERNAL_ERROR;
  517. }
  518. string cache_manifest = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kManifestSuffix;
  519. auto ret = SaveJsonToFile(cache_manifest, cache_json);
  520. if (ret != SUCCESS) {
  521. GELOGW("Fail to save cache info to json file, path: %s.", cache_path_.c_str());
  522. return ret;
  523. }
  524. return SUCCESS;
  525. }
  526. Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const {
  527. string cache_manifest = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kManifestSuffix;
  528. Json cache_json;
  529. if (LoadJsonFromFile(cache_manifest, cache_json) != SUCCESS) {
  530. GELOGW("Fail to load json from cache file: %s", cache_manifest.c_str());
  531. return INTERNAL_ERROR;
  532. }
  533. if (!cache_json.is_object()) {
  534. GELOGW("Manifest should be a json object");
  535. return INTERNAL_ERROR;
  536. }
  537. try {
  538. cache_info.node_num = cache_json[kNodeNum];
  539. cache_info.edge_num = cache_json[kEdgeNum];
  540. cache_info.graph_hash = cache_json[kGraphHash];
  541. Json nodes_hash_json = cache_json[kNodeHash];
  542. if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) {
  543. GELOGW("Nodes hash in cache should be null or array.");
  544. return FAILED;
  545. }
  546. for (const auto &iter : nodes_hash_json) {
  547. cache_info.nodes_hash[iter[kName].get<std::string>()] = iter[kHash].get<size_t>();
  548. }
  549. } catch (const std::exception &e) {
  550. GELOGW("Fail to get info from json file. Error message: %s", e.what());
  551. return INTERNAL_ERROR;
  552. }
  553. return SUCCESS;
  554. }
  555. bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const {
  556. if (!(json.is_null() || json.is_array())) {
  557. GELOGW("Input param json type should be null or array.");
  558. return false;
  559. }
  560. // Compare allocated graph id info between json and VarManager
  561. std::unordered_map<std::string, uint32_t> allocated_graph_id;
  562. auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  563. if (ret != SUCCESS) {
  564. GELOGW("Fail to parse AllocatedGraphId from Json.");
  565. return false;
  566. }
  567. for (const auto &iter : allocated_graph_id) {
  568. uint32_t graph_id = 0;
  569. ret = VarManager::Instance(session_id_)->GetAllocatedGraphId(iter.first, graph_id);
  570. if (ret != SUCCESS) {
  571. GELOGW("Fail to find allocated graph id of var[%s].", iter.first.c_str());
  572. return false;
  573. }
  574. if (graph_id != iter.second) {
  575. GELOGW("The allocated graph id of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  576. return false;
  577. }
  578. }
  579. return true;
  580. }
  581. bool ModelCacheHelper::IsNodeHashSameAsCache(const map<std::string, size_t> &hash_map) const {
  582. map<std::string, size_t> cur_hash_map;
  583. GetNodesHash(cur_hash_map);
  584. if (hash_map.size() != cur_hash_map.size()) {
  585. GELOGI("The number of hash code is different from cache info.");
  586. return false;
  587. }
  588. for (const auto &iter : cur_hash_map) {
  589. if (hash_map.count(iter.first) == 0) {
  590. GELOGI("Node[%s] is not found in cache info.", iter.first.c_str());
  591. return false;
  592. }
  593. if (hash_map.at(iter.first) != iter.second) {
  594. GELOGI("The hash code of node[%s] is different from cache info.", iter.first.c_str());
  595. return false;
  596. }
  597. }
  598. return true;
  599. }
  600. bool ModelCacheHelper::IsMemResourceSameAsCache(Json &json) const {
  601. if (!(json.is_null() || json.is_array())) {
  602. GELOGW("Input param json type should be null or array.");
  603. return false;
  604. }
  605. // Compare var mem size info between json and VarManager
  606. std::map<rtMemType_t, int64_t> var_mem_size;
  607. auto ret = ParseMemResourceFromJson(json, var_mem_size);
  608. if (ret != SUCCESS) {
  609. GELOGW("Fail to parse MemResource from Json.");
  610. return false;
  611. }
  612. for (const auto &iter : var_mem_size) {
  613. int64_t mem_size = VarManager::Instance(session_id_)->GetVarMemSize(iter.first);
  614. if (mem_size != iter.second) {
  615. GELOGW("The var mem size of memory_type[%u] in cache is different from VarManager.", iter.first);
  616. return false;
  617. }
  618. }
  619. return true;
  620. }
  621. bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const {
  622. if (!(json.is_null() || json.is_array())) {
  623. GELOGW("Input param json type should be null or array.");
  624. return false;
  625. }
  626. // Compare variable changed graph id info between json and VarManager
  627. std::unordered_map<std::string, uint32_t> changed_graph_id;
  628. auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  629. if (ret != SUCCESS) {
  630. GELOGW("Fail to parse ChangedGraphId from Json.");
  631. return false;
  632. }
  633. for (const auto &iter : changed_graph_id) {
  634. uint32_t graph_id = 0;
  635. ret = VarManager::Instance(session_id_)->GetChangedGraphId(iter.first, graph_id);
  636. if (ret != SUCCESS) {
  637. GELOGW("Fail to find changed graph id of var[%s].", iter.first.c_str());
  638. return false;
  639. }
  640. if (graph_id != iter.second) {
  641. GELOGW("The changed graph id of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  642. return false;
  643. }
  644. }
  645. return true;
  646. }
  647. bool ModelCacheHelper::IsCurVarTensorDescSameAsCache(Json &json) const {
  648. if (!(json.is_null() || json.is_array())) {
  649. GELOGW("Input param json type should be null or array.");
  650. return false;
  651. }
  652. // Compare variable tensor desc info between json and VarManager
  653. std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc;
  654. auto ret = ParseCurVarTensorDescMapFromJson(json, cur_var_tensor_desc);
  655. if (ret != SUCCESS) {
  656. GELOGW("Fail to parse CurVarTensorDesc from Json.");
  657. return false;
  658. }
  659. for (const auto &iter : cur_var_tensor_desc) {
  660. GeTensorDesc tensor_desc;
  661. ret = VarManager::Instance(session_id_)->GetCurVarDesc(iter.first, tensor_desc);
  662. if (ret != SUCCESS) {
  663. GELOGW("Fail to find tensor desc of var[%s].", iter.first.c_str());
  664. return false;
  665. }
  666. uint32_t l_real_dim_cnt = 0;
  667. uint32_t r_real_dim_cnt = 0;
  668. TensorUtils::GetRealDimCnt(tensor_desc, l_real_dim_cnt);
  669. TensorUtils::GetRealDimCnt(iter.second, r_real_dim_cnt);
  670. if ((tensor_desc.GetDataType() != iter.second.GetDataType()) ||
  671. (tensor_desc.GetOriginDataType() != iter.second.GetOriginDataType()) ||
  672. (tensor_desc.GetFormat() != iter.second.GetFormat()) ||
  673. (tensor_desc.GetOriginFormat() != iter.second.GetOriginFormat()) ||
  674. (tensor_desc.GetShape().ToString() != iter.second.GetShape().ToString()) ||
  675. (tensor_desc.GetOriginShape().ToString() != iter.second.GetOriginShape().ToString()) ||
  676. (l_real_dim_cnt != r_real_dim_cnt)) {
  677. GELOGW("The var tensor desc of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  678. return false;
  679. }
  680. }
  681. return true;
  682. }
  683. bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const {
  684. if (!(json.is_null() || json.is_array())) {
  685. GELOGW("Input param json type should be null or array.");
  686. return false;
  687. }
  688. // Compare variable address info between json and VarManager
  689. std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  690. std::unordered_set<uint64_t> var_offset_set;
  691. auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  692. if (ret != SUCCESS) {
  693. GELOGW("Fail to parse VarAddrMgrMap from Json.");
  694. return false;
  695. }
  696. for (const auto &iter : var_addr_mgr_vector) {
  697. uint8_t *dev_ptr = nullptr;
  698. rtMemType_t memory_type;
  699. ret = VarManager::Instance(session_id_)->GetVarAddr(iter.first, iter.second.tensor_desc, &dev_ptr, memory_type);
  700. if (ret != SUCCESS) {
  701. GELOGW("Fail to find tensor desc of var[%s].", iter.first.c_str());
  702. return false;
  703. }
  704. // Compare memory type and logic address
  705. if (iter.second.memory_type != memory_type || iter.second.address != dev_ptr) {
  706. GELOGW("The VarAddrMgr of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  707. return false;
  708. }
  709. }
  710. return true;
  711. }
  712. bool ModelCacheHelper::IsBroadcastInfoSameAsCache(Json &json) const {
  713. if (!(json.is_null() || json.is_array())) {
  714. GELOGW("Input param json type should be null or array.");
  715. return false;
  716. }
  717. // Compare broadcast info between json and VarManager
  718. std::unordered_map<std::string, VarBroadCastInfo> var_broadcast_info;
  719. auto ret = ParseBroadcastInfoFromJson(json, var_broadcast_info);
  720. if (ret != SUCCESS) {
  721. GELOGW("Fail to parse BroadcastInfo from Json.");
  722. return false;
  723. }
  724. for (const auto &iter : var_broadcast_info) {
  725. VarBroadCastInfo broadcast_info;
  726. if (VarManager::Instance(session_id_)->GetBroadCastInfo(graph_id_, iter.first, broadcast_info) != SUCCESS) {
  727. GELOGW("Fail to find broadcast info of var[%s].", iter.first.c_str());
  728. return false;
  729. }
  730. if (iter.second.var_name != broadcast_info.var_name || iter.second.idx != broadcast_info.idx ||
  731. iter.second.input_size != broadcast_info.input_size ||
  732. iter.second.input_offset != broadcast_info.input_offset ||
  733. iter.second.output_size != broadcast_info.output_size ||
  734. iter.second.output_offset != broadcast_info.output_offset) {
  735. GELOGW("The BroadcastInfo of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  736. return false;
  737. }
  738. }
  739. return true;
  740. }
  741. bool ModelCacheHelper::IsTransRoadsSameAsCache(Json &json) const {
  742. if (!(json.is_null() || json.is_array())) {
  743. GELOGW("Input param json type should be null or array.");
  744. return false;
  745. }
  746. // Compare trans road between json and VarManager
  747. std::unordered_map<std::string, std::vector<TransNodeInfo>> trans_roads;
  748. auto ret = ParseTransRoadsFromJson(json, trans_roads);
  749. if (ret != SUCCESS) {
  750. GELOGW("Fail to parse TransRoads from Json.");
  751. return false;
  752. }
  753. for (const auto &iter : trans_roads) {
  754. VarTransRoad *trans_road;
  755. trans_road = VarManager::Instance(session_id_)->GetTransRoad(iter.first);
  756. if (trans_road == nullptr) {
  757. GELOGW("Fail to find trans road of var[%s].", iter.first.c_str());
  758. return false;
  759. }
  760. if (trans_road->size() != iter.second.size()) {
  761. GELOGW("The TransRoad of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  762. return false;
  763. }
  764. // Compare every trans node in trans road.
  765. for (size_t idx = 0; idx < trans_road->size(); idx += 1) {
  766. if (!(trans_road->at(idx).node_type == iter.second.at(idx).node_type &&
  767. trans_road->at(idx).input == iter.second.at(idx).input &&
  768. trans_road->at(idx).output == iter.second.at(idx).output)) {
  769. GELOGW("The TransRoad of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  770. return false;
  771. }
  772. }
  773. }
  774. return true;
  775. }
  776. bool ModelCacheHelper::IsVarManagerParamSameAsCache(Json &json) const {
  777. if (!json.is_object()) {
  778. GELOGW("Input param json type should be object.");
  779. return false;
  780. }
  781. try {
  782. if (json[kSessionId].get<uint64_t>() != session_id_) {
  783. GELOGW("Check VarManager cache failed.[sessionId]");
  784. return false;
  785. }
  786. if (json[kDeviceId].get<uint32_t>() != VarManager::Instance(session_id_)->DeviceId()) {
  787. GELOGW("Check VarManager cache failed.[deviceId]");
  788. return false;
  789. }
  790. if (json[kJobId].get<uint64_t>() != VarManager::Instance(session_id_)->JobId()) {
  791. GELOGW("Check VarManager cache failed.[jobId]");
  792. return false;
  793. }
  794. if (json[kGraphMemMaxSize].get<size_t>() != VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()) {
  795. GELOGW("Check VarManager cache failed.[graphMemMaxSize]");
  796. return false;
  797. }
  798. if (json[kVarMemMaxSize].get<size_t>() != VarManager::Instance(session_id_)->GetVarMemMaxSize()) {
  799. GELOGW("Check VarManager cache failed.[varMemMaxSize]");
  800. return false;
  801. }
  802. if (json[kVarMemLogicBase].get<size_t>() != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
  803. GELOGW("Check VarManager cache failed.[varMemLogicBase]");
  804. return false;
  805. }
  806. if (json[kUseMaxMemSize].get<size_t>() != VarManager::Instance(session_id_)->GetUseMaxMemorySize()) {
  807. GELOGW("Check VarManager cache failed.[useMaxMemSize]");
  808. return false;
  809. }
  810. } catch (const std::exception &e) {
  811. GELOGW("Fail to check VarManager json. Error message: %s", e.what());
  812. return false;
  813. }
  814. return true;
  815. }
  816. bool ModelCacheHelper::IsVarManagerSameAsCache(Json &json) const {
  817. if (!json.is_object()) {
  818. GELOGW("Input param json type should be object.");
  819. return false;
  820. }
  821. try {
  822. if (!IsVarManagerParamSameAsCache(json)) {
  823. GELOGW("Check VarManager cache failed.[Param]");
  824. return false;
  825. }
  826. Json mem_resource_json = move(json[kMemResourceMap]);
  827. auto ret = IsMemResourceSameAsCache(mem_resource_json);
  828. if (!ret) {
  829. GELOGW("Check VarManager cache failed.[MemResource]");
  830. return false;
  831. }
  832. Json var_resource_json = move(json[kVarResource]);
  833. ret = IsAllocatedGraphIdSameAsCache(var_resource_json[kAllocatedGraphId]);
  834. if (!ret) {
  835. GELOGW("Check VarManager cache failed.[AllocatedGraphId]");
  836. return false;
  837. }
  838. ret = IsChangedGraphIdSameAsCache(var_resource_json[kChangedGraphId]);
  839. if (!ret) {
  840. GELOGW("Check VarManager cache failed.[ChangedGraphId]");
  841. return false;
  842. }
  843. ret = IsBroadcastInfoSameAsCache(var_resource_json[kVarBroadcastInfo]);
  844. if (!ret) {
  845. GELOGW("Check VarManager cache failed.[VarBroadcastInfo]");
  846. return false;
  847. }
  848. ret = IsCurVarTensorDescSameAsCache(var_resource_json[kCurVarTensorDescMap]);
  849. if (!ret) {
  850. GELOGW("Check VarManager cache failed.[CurVarTensorDesc]");
  851. return false;
  852. }
  853. ret = IsVarAddrMgrMapSameAsCache(var_resource_json[kVarAddrMgrMap]);
  854. if (!ret) {
  855. GELOGW("Check VarManager cache failed.[VarAddrMgrMap]");
  856. return false;
  857. }
  858. ret = IsTransRoadsSameAsCache(var_resource_json[kTransRoads]);
  859. if (!ret) {
  860. GELOGW("Check VarManager cache failed.[TransRoads]");
  861. return false;
  862. }
  863. } catch (const std::exception &e) {
  864. GELOGW("Fail to check VarManager json. Error message: %s", e.what());
  865. return false;
  866. }
  867. return true;
  868. }
  869. Status ModelCacheHelper::RecoverMemResource(const Json &json) const {
  870. if (!(json.is_null() || json.is_array())) {
  871. GELOGW("Input param json type should be null or array.");
  872. return PARAM_INVALID;
  873. }
  874. std::map<rtMemType_t, int64_t> var_mem_size;
  875. auto ret = ParseMemResourceFromJson(json, var_mem_size);
  876. if (ret != SUCCESS) {
  877. GELOGW("Fail to parse MemResource from Json.");
  878. return ret;
  879. }
  880. for (const auto &iter : var_mem_size) {
  881. ret = VarManager::Instance(session_id_)->UpdateVarMemSize(iter.first, iter.second);
  882. if (ret != SUCCESS) {
  883. GELOGW("Fail to recover var mem size.");
  884. return ret;
  885. }
  886. }
  887. return SUCCESS;
  888. }
  889. Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const {
  890. if (!(json.is_null() || json.is_array())) {
  891. GELOGW("Input param json type should be null or array.");
  892. return PARAM_INVALID;
  893. }
  894. std::unordered_map<std::string, uint32_t> allocated_graph_id;
  895. auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  896. if (ret != SUCCESS) {
  897. GELOGW("Fail to parse AllocatedGraphId from Json.");
  898. return ret;
  899. }
  900. for (const auto &iter : allocated_graph_id) {
  901. ret = VarManager::Instance(session_id_)->SetAllocatedGraphId(iter.first, iter.second);
  902. if (ret != SUCCESS) {
  903. GELOGW("Fail to recover allocated graph id.");
  904. return ret;
  905. }
  906. }
  907. return SUCCESS;
  908. }
  909. Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const {
  910. if (!(json.is_null() || json.is_array())) {
  911. GELOGW("Input param json type should be null or array.");
  912. return PARAM_INVALID;
  913. }
  914. std::unordered_map<std::string, uint32_t> changed_graph_id;
  915. auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  916. if (ret != SUCCESS) {
  917. GELOGW("Fail to parse AllocatedGraphId from Json.");
  918. return ret;
  919. }
  920. for (const auto &iter : changed_graph_id) {
  921. ret = VarManager::Instance(session_id_)->SetChangedGraphId(iter.first, iter.second);
  922. if (ret != SUCCESS) {
  923. GELOGW("Fail to recover changed graph id.");
  924. return ret;
  925. }
  926. }
  927. return SUCCESS;
  928. }
  929. Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
  930. if (!(json.is_null() || json.is_array())) {
  931. GELOGW("Input param json type should be null or array.");
  932. return PARAM_INVALID;
  933. }
  934. std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  935. std::unordered_set<uint64_t> var_offset_set;
  936. auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  937. if (ret != SUCCESS) {
  938. GELOGW("Fail to parse VarAddrMgrMap from Json.");
  939. return ret;
  940. }
  941. for (const auto &iter : var_addr_mgr_vector) {
  942. const VarAddrMgr &tensor_addr_mgr = iter.second;
  943. const bool var_exist = VarManager::Instance(session_id_)->IsVarExist(iter.first, tensor_addr_mgr.tensor_desc);
  944. // SaveVarVddr if var does not exist, the logic address will be recorded by VarManager
  945. if (!var_exist) {
  946. auto logic_address = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(tensor_addr_mgr.address));
  947. auto offset = (tensor_addr_mgr.offset);
  948. // Check logic address and offset
  949. if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
  950. GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address,
  951. offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
  952. return PARAM_INVALID;
  953. }
  954. // Offset is needed by SaveVarVddr instead of logic address
  955. ret =
  956. VarManager::Instance(session_id_)
  957. ->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc,
  958. reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(offset)), tensor_addr_mgr.memory_type);
  959. if (ret != SUCCESS) {
  960. GELOGW("Fail to recover VarAddr or TensorDesc of var[%s].", iter.first.c_str());
  961. return ret;
  962. }
  963. }
  964. // SetVarAddr to update cur_var_tensor_desc_map_
  965. ret = VarManager::Instance(session_id_)
  966. ->SetVarAddr(iter.first, tensor_addr_mgr.tensor_desc, tensor_addr_mgr.address, tensor_addr_mgr.memory_type);
  967. if (ret != SUCCESS) {
  968. GELOGW("Fail to recover VarAddr or TensorDesc desc of var[%s].", iter.first.c_str());
  969. return ret;
  970. }
  971. }
  972. return SUCCESS;
  973. }
  974. Status ModelCacheHelper::RecoverBroadcastInfo(const Json &json) const {
  975. if (!(json.is_null() || json.is_array())) {
  976. GELOGW("Input param json type should be null or array.");
  977. return PARAM_INVALID;
  978. }
  979. std::unordered_map<std::string, VarBroadCastInfo> var_broadcast_info;
  980. auto ret = ParseBroadcastInfoFromJson(json, var_broadcast_info);
  981. if (ret != SUCCESS) {
  982. GELOGW("Fail to parse BroadcastInfo from Json.");
  983. return ret;
  984. }
  985. for (const auto &iter : var_broadcast_info) {
  986. VarBroadCastInfo broadcast_info;
  987. ret = VarManager::Instance(session_id_)->SaveBroadCastInfo(graph_id_, iter.second);
  988. if (ret != SUCCESS) {
  989. GELOGW("Fail to recover broadcast info of var[%s].", iter.first.c_str());
  990. return ret;
  991. }
  992. }
  993. return SUCCESS;
  994. }
  995. Status ModelCacheHelper::RecoverTransRoads(const Json &json) const {
  996. if (!(json.is_null() || json.is_array())) {
  997. GELOGW("Input param json type should be null or array.");
  998. return PARAM_INVALID;
  999. }
  1000. std::unordered_map<std::string, std::vector<TransNodeInfo>> trans_roads;
  1001. auto ret = ParseTransRoadsFromJson(json, trans_roads);
  1002. if (ret != SUCCESS) {
  1003. GELOGW("Fail to parse TransRoads from Json.");
  1004. return ret;
  1005. }
  1006. for (const auto &iter : trans_roads) {
  1007. ret = VarManager::Instance(session_id_)->SetTransRoad(iter.first, iter.second);
  1008. if (ret != SUCCESS) {
  1009. GELOGW("Fail to find trans road of var[%s].", iter.first.c_str());
  1010. return ret;
  1011. }
  1012. }
  1013. return SUCCESS;
  1014. }
  1015. Status ModelCacheHelper::TensorDescToJson(const GeTensorDesc &ge_tensor_desc, Json &json) {
  1016. if (!(json.is_null() || json.is_object())) {
  1017. GELOGW("Input param json type should be null or object.");
  1018. return PARAM_INVALID;
  1019. }
  1020. try {
  1021. json[kDataType] = static_cast<int>(ge_tensor_desc.GetDataType());
  1022. json[kOriginDataType] = static_cast<int>(ge_tensor_desc.GetOriginDataType());
  1023. json[kLayout] = static_cast<int>(ge_tensor_desc.GetFormat());
  1024. json[kOriginLayout] = static_cast<int>(ge_tensor_desc.GetOriginFormat());
  1025. json[kShape] = ge_tensor_desc.GetShape().GetDims();
  1026. json[kOriginShape] = ge_tensor_desc.GetOriginShape().GetDims();
  1027. uint32_t real_dim_cnt = 0;
  1028. (void)TensorUtils::GetRealDimCnt(ge_tensor_desc, real_dim_cnt); // [No need to check value]
  1029. json[kRealDimCnt] = real_dim_cnt;
  1030. } catch (const std::exception &e) {
  1031. GELOGW("Fail to trans GeTensorDesc to json. Error message: %s", e.what());
  1032. return INTERNAL_ERROR;
  1033. }
  1034. return SUCCESS;
  1035. }
  1036. Status ModelCacheHelper::JsonToTensorDesc(const Json &json, ge::GeTensorDesc &ge_tensor_desc) {
  1037. if (!json.is_object()) {
  1038. GELOGW("Input param json type should be object.");
  1039. return PARAM_INVALID;
  1040. }
  1041. try {
  1042. ge_tensor_desc.SetDataType(static_cast<DataType>(json[kDataType].get<int>()));
  1043. ge_tensor_desc.SetOriginDataType(static_cast<DataType>(json[kOriginDataType].get<int>()));
  1044. ge_tensor_desc.SetFormat(static_cast<Format>(json[kLayout].get<int>()));
  1045. ge_tensor_desc.SetOriginFormat(static_cast<Format>(json[kOriginLayout].get<int>()));
  1046. GeShape shape(json[kShape].get<std::vector<int64_t>>());
  1047. ge_tensor_desc.SetShape(shape);
  1048. GeShape origin_shape(json[kOriginShape].get<std::vector<int64_t>>());
  1049. ge_tensor_desc.SetOriginShape(origin_shape);
  1050. auto real_dim_cnt = json[kRealDimCnt].get<uint32_t>();
  1051. (void)TensorUtils::SetRealDimCnt(ge_tensor_desc, real_dim_cnt); // [No need to check value]
  1052. } catch (const std::exception &e) {
  1053. GELOGW("Fail to trans Json to GeTensorDesc. Error message: %s", e.what());
  1054. return INTERNAL_ERROR;
  1055. }
  1056. return SUCCESS;
  1057. }
  1058. Status ModelCacheHelper::GetNodesHashMapJson(Json &json) const {
  1059. if (!(json.is_null() || json.is_array())) {
  1060. GELOGW("Input param json type should be null or array.");
  1061. return PARAM_INVALID;
  1062. }
  1063. map<std::string, size_t> hash_map;
  1064. GetNodesHash(hash_map);
  1065. for (const auto &iter : hash_map) {
  1066. Json node_hash_json;
  1067. try {
  1068. node_hash_json[kName] = iter.first;
  1069. node_hash_json[kHash] = iter.second;
  1070. json.emplace_back(move(node_hash_json));
  1071. } catch (const std::exception &e) {
  1072. GELOGW("Fail to trans node cache to json. Error message: %s", e.what());
  1073. return INTERNAL_ERROR;
  1074. }
  1075. }
  1076. return SUCCESS;
  1077. }
  1078. Status ModelCacheHelper::GetMemResourceMap(Json &json) const {
  1079. if (!(json.is_null() || json.is_array())) {
  1080. GELOGW("Input param json type should be null or array.");
  1081. return PARAM_INVALID;
  1082. }
  1083. const auto total_size = VarManager::Instance(session_id_)->GetVarMemMaxSize();
  1084. const auto var_mem_size = VarManager::Instance(session_id_)->GetVarMemSize(RT_MEMORY_HBM);
  1085. Json mem_resource_json;
  1086. try {
  1087. mem_resource_json[kMemType] = RT_MEMORY_HBM;
  1088. mem_resource_json[kTotalSize] = total_size;
  1089. mem_resource_json[kVarMemSize] = var_mem_size;
  1090. json.emplace_back(move(mem_resource_json));
  1091. } catch (const std::exception &e) {
  1092. GELOGW("Fail to trans MemResourceMap to json. Error message: %s", e.what());
  1093. return INTERNAL_ERROR;
  1094. }
  1095. return SUCCESS;
  1096. }
  1097. Status ModelCacheHelper::GetVarAddrMgrMapJson(Json &json) const {
  1098. if (!(json.is_null() || json.is_array())) {
  1099. GELOGW("Input param json type should be null or array.");
  1100. return PARAM_INVALID;
  1101. }
  1102. std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map;
  1103. VarManager::Instance(session_id_)->GetAllVarAddrMgr(var_addr_mgr_map);
  1104. try {
  1105. for (const auto &iter : var_addr_mgr_map) {
  1106. Json var_addr_json;
  1107. string name;
  1108. GetVarNameFromVarKey(iter.first, iter.second.tensor_desc, name);
  1109. var_addr_json[kName] = name;
  1110. var_addr_json[kAddress] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(iter.second.address));
  1111. var_addr_json[kMemoryType] = iter.second.memory_type;
  1112. var_addr_json[kOffset] = iter.second.offset;
  1113. // Copy tensor desc to json.
  1114. Json tensor_desc_json;
  1115. auto ret = TensorDescToJson(iter.second.tensor_desc, tensor_desc_json);
  1116. if (ret != SUCCESS) {
  1117. GELOGW("Fail to trans tensor desc to json.");
  1118. return INTERNAL_ERROR;
  1119. }
  1120. var_addr_json[kTensorDesc] = move(tensor_desc_json);
  1121. json.emplace_back(move(var_addr_json));
  1122. }
  1123. } catch (const std::exception &e) {
  1124. GELOGW("Fail to trans VarAddrMgrMap to json. Error message: %s", e.what());
  1125. return INTERNAL_ERROR;
  1126. }
  1127. return SUCCESS;
  1128. }
  1129. Status ModelCacheHelper::GetCurVarTensorDescMapJson(Json &json) const {
  1130. if (!(json.is_null() || json.is_array())) {
  1131. GELOGW("Input param json type should be null or array.");
  1132. return PARAM_INVALID;
  1133. }
  1134. try {
  1135. for (const auto &name : var_names_) {
  1136. Json cur_tensor_desc_json;
  1137. GeTensorDesc tensor_desc;
  1138. auto ret = VarManager::Instance(session_id_)->GetCurVarDesc(name, tensor_desc);
  1139. if (ret != SUCCESS) {
  1140. GELOGI("Get variable[%s] current tensor desc failed. It will be skipped.", name.c_str());
  1141. continue;
  1142. }
  1143. cur_tensor_desc_json[kName] = name;
  1144. Json tensor_desc_json;
  1145. ret = TensorDescToJson(tensor_desc, tensor_desc_json);
  1146. if (ret != SUCCESS) {
  1147. GELOGW("Fail to trans tensor desc to json.");
  1148. return INTERNAL_ERROR;
  1149. }
  1150. cur_tensor_desc_json[kTensorDesc] = move(tensor_desc_json);
  1151. json.emplace_back(move(cur_tensor_desc_json));
  1152. }
  1153. } catch (const std::exception &e) {
  1154. GELOGW("Fail to trans CurVarTensorDescMap to json. Error message: %s", e.what());
  1155. return INTERNAL_ERROR;
  1156. }
  1157. return SUCCESS;
  1158. }
  1159. Status ModelCacheHelper::GetTransRoadsJson(Json &json) const {
  1160. if (!(json.is_null() || json.is_array())) {
  1161. GELOGW("Input param json type should be null or array.");
  1162. return PARAM_INVALID;
  1163. }
  1164. try {
  1165. for (const auto &name : var_names_) {
  1166. auto trans_road = VarManager::Instance(session_id_)->GetTransRoad(name);
  1167. if (trans_road == nullptr) {
  1168. continue;
  1169. }
  1170. // Json object, variable name and trans road
  1171. Json trans_road_map_json;
  1172. trans_road_map_json[kName] = name;
  1173. Json trans_road_json;
  1174. Status ret;
  1175. // Add nodes' info to json
  1176. for (const auto &trans_node_info : *trans_road) {
  1177. Json trans_node_info_json;
  1178. trans_node_info_json[kNodeType] = trans_node_info.node_type;
  1179. Json input_tensor_desc_json;
  1180. ret = TensorDescToJson(trans_node_info.input, input_tensor_desc_json);
  1181. if (ret != SUCCESS) {
  1182. GELOGW("Fail to trans tensor desc to json.");
  1183. return INTERNAL_ERROR;
  1184. }
  1185. trans_node_info_json[kInputTensorDesc] = move(input_tensor_desc_json);
  1186. Json output_tensor_desc_json;
  1187. ret = TensorDescToJson(trans_node_info.output, output_tensor_desc_json);
  1188. if (ret != SUCCESS) {
  1189. GELOGW("Fail to trans tensor desc to json.");
  1190. return INTERNAL_ERROR;
  1191. }
  1192. trans_node_info_json[kOutputTensorDesc] = move(output_tensor_desc_json);
  1193. trans_road_json.emplace_back(move(trans_node_info_json));
  1194. }
  1195. trans_road_map_json[kTransRoad] = move(trans_road_json);
  1196. json.emplace_back(move(trans_road_map_json));
  1197. }
  1198. } catch (const std::exception &e) {
  1199. GELOGW("Fail to trans VarToTransRoad to json. Error message: %s", e.what());
  1200. return INTERNAL_ERROR;
  1201. }
  1202. return SUCCESS;
  1203. }
  1204. Status ModelCacheHelper::GetChangedGraphIdJson(Json &json) const {
  1205. if (!(json.is_null() || json.is_array())) {
  1206. GELOGW("Input param json type should be null or array.");
  1207. return PARAM_INVALID;
  1208. }
  1209. for (const auto &name : var_names_) {
  1210. uint32_t changed_graph_id = 0;
  1211. Status ret = VarManager::Instance(session_id_)->GetChangedGraphId(name, changed_graph_id);
  1212. if (ret != SUCCESS) {
  1213. continue;
  1214. }
  1215. Json name_and_changed_graph_id;
  1216. try {
  1217. name_and_changed_graph_id[kName] = name;
  1218. name_and_changed_graph_id[kGraphId] = changed_graph_id;
  1219. json.emplace_back(move(name_and_changed_graph_id));
  1220. } catch (const std::exception &e) {
  1221. GELOGW("Fail to trans ChangedGraphId to json. Error message: %s", e.what());
  1222. return INTERNAL_ERROR;
  1223. }
  1224. }
  1225. return SUCCESS;
  1226. }
  1227. Status ModelCacheHelper::GetAllocatedGraphIdJson(Json &json) const {
  1228. if (!(json.is_null() || json.is_array())) {
  1229. GELOGW("Input param json type should be null or array.");
  1230. return PARAM_INVALID;
  1231. }
  1232. for (const auto &name : var_names_) {
  1233. uint32_t allocated_graph_id = 0;
  1234. Status ret = VarManager::Instance(session_id_)->GetAllocatedGraphId(name, allocated_graph_id);
  1235. if (ret != SUCCESS) {
  1236. continue;
  1237. }
  1238. Json name_and_allocated_graph_id;
  1239. try {
  1240. name_and_allocated_graph_id[kName] = name;
  1241. name_and_allocated_graph_id[kGraphId] = allocated_graph_id;
  1242. json.emplace_back(move(name_and_allocated_graph_id));
  1243. } catch (const std::exception &e) {
  1244. GELOGW("Fail to trans AllocatedGraphId to json. Error message: %s", e.what());
  1245. return INTERNAL_ERROR;
  1246. }
  1247. }
  1248. return SUCCESS;
  1249. }
  1250. Status ModelCacheHelper::GetBroadcastInfoJson(Json &json) const {
  1251. if (!(json.is_null() || json.is_array())) {
  1252. GELOGW("Input param json type should be null or array.");
  1253. return PARAM_INVALID;
  1254. }
  1255. for (const auto &name : var_names_) {
  1256. VarBroadCastInfo var_broadcast_info;
  1257. Status ret = VarManager::Instance(session_id_)->GetBroadCastInfo(graph_id_, name, var_broadcast_info);
  1258. if (ret != SUCCESS) {
  1259. continue;
  1260. }
  1261. Json var_broadcast_info_json;
  1262. try {
  1263. var_broadcast_info_json[kName] = name;
  1264. var_broadcast_info_json[kBroadcastName] = var_broadcast_info.broadcast_name;
  1265. var_broadcast_info_json[kIdx] = var_broadcast_info.idx;
  1266. var_broadcast_info_json[kInputOffset] = var_broadcast_info.input_offset;
  1267. var_broadcast_info_json[kInputSize] = var_broadcast_info.input_size;
  1268. var_broadcast_info_json[kOutputOffset] = var_broadcast_info.output_offset;
  1269. var_broadcast_info_json[kOutputSize] = var_broadcast_info.output_size;
  1270. json.emplace_back(move(var_broadcast_info_json));
  1271. } catch (const std::exception &e) {
  1272. GELOGW("Fail to trans VarBroadcastInfo to json. Error message: %s", e.what());
  1273. return INTERNAL_ERROR;
  1274. }
  1275. }
  1276. return SUCCESS;
  1277. }
  1278. Status ModelCacheHelper::GetVarResourceJson(Json &json) const {
  1279. if (!(json.is_null() || json.is_object())) {
  1280. GELOGW("Input param json type should be null or object.");
  1281. return PARAM_INVALID;
  1282. }
  1283. Json var_addr_mgr_map_json;
  1284. Status ret = GetVarAddrMgrMapJson(var_addr_mgr_map_json);
  1285. if (ret != SUCCESS) {
  1286. GELOGW("GetVarAddrMgrMapJson failed.");
  1287. return INTERNAL_ERROR;
  1288. }
  1289. Json cur_var_tensor_desc_map_json;
  1290. ret = GetCurVarTensorDescMapJson(cur_var_tensor_desc_map_json);
  1291. if (ret != SUCCESS) {
  1292. GELOGW("GetCurVarTensorDescMapJson failed.");
  1293. return INTERNAL_ERROR;
  1294. }
  1295. Json trans_roads_json;
  1296. ret = GetTransRoadsJson(trans_roads_json);
  1297. if (ret != SUCCESS) {
  1298. GELOGW("GetTransRoadsJson failed.");
  1299. return INTERNAL_ERROR;
  1300. }
  1301. Json changed_graph_id_json;
  1302. ret = GetChangedGraphIdJson(changed_graph_id_json);
  1303. if (ret != SUCCESS) {
  1304. GELOGW("GetChangedGraphIdJson failed.");
  1305. return INTERNAL_ERROR;
  1306. }
  1307. Json allocated_graph_id_json;
  1308. ret = GetAllocatedGraphIdJson(allocated_graph_id_json);
  1309. if (ret != SUCCESS) {
  1310. GELOGW("GetAllocatedGraphIdJson failed.");
  1311. return INTERNAL_ERROR;
  1312. }
  1313. Json var_broadcast_info_json;
  1314. ret = GetBroadcastInfoJson(var_broadcast_info_json);
  1315. if (ret != SUCCESS) {
  1316. GELOGW("GetBroadcastInfoJson failed.");
  1317. return INTERNAL_ERROR;
  1318. }
  1319. try {
  1320. json[kVarAddrMgrMap] = move(var_addr_mgr_map_json);
  1321. json[kCurVarTensorDescMap] = move(cur_var_tensor_desc_map_json);
  1322. json[kTransRoads] = move(trans_roads_json);
  1323. json[kChangedGraphId] = move(changed_graph_id_json);
  1324. json[kAllocatedGraphId] = move(allocated_graph_id_json);
  1325. json[kVarBroadcastInfo] = move(var_broadcast_info_json);
  1326. } catch (const exception &e) {
  1327. GELOGW("Fail to generate VarResource json. Error message: %s", e.what());
  1328. return INTERNAL_ERROR;
  1329. }
  1330. return SUCCESS;
  1331. }
  1332. Status ModelCacheHelper::GetVarManagerJson(Json &json) const {
  1333. if (!(json.is_null() || json.is_object())) {
  1334. GELOGW("Input param json type should be null or object.");
  1335. return PARAM_INVALID;
  1336. }
  1337. Json mem_resource_map_json;
  1338. auto ret = GetMemResourceMap(mem_resource_map_json);
  1339. if (ret != SUCCESS) {
  1340. GELOGW("GetMemResourceMap failed.");
  1341. return INTERNAL_ERROR;
  1342. }
  1343. Json var_resource_json;
  1344. ret = GetVarResourceJson(var_resource_json);
  1345. if (ret != SUCCESS) {
  1346. GELOGW("GetVarResourceJson failed.");
  1347. return INTERNAL_ERROR;
  1348. }
  1349. try {
  1350. json[kSessionId] = session_id_;
  1351. json[kDeviceId] = VarManager::Instance(session_id_)->DeviceId();
  1352. json[kJobId] = VarManager::Instance(session_id_)->JobId();
  1353. json[kGraphMemMaxSize] = VarManager::Instance(session_id_)->GetGraphMemoryMaxSize();
  1354. json[kVarMemMaxSize] = VarManager::Instance(session_id_)->GetVarMemMaxSize();
  1355. json[kVarMemLogicBase] = VarManager::Instance(session_id_)->GetVarMemLogicBase();
  1356. json[kUseMaxMemSize] = VarManager::Instance(session_id_)->GetUseMaxMemorySize();
  1357. json[kMemResourceMap] = move(mem_resource_map_json);
  1358. json[kVarResource] = move(var_resource_json);
  1359. } catch (const exception &e) {
  1360. GELOGW("Fail to generate VarManager json. Error message: %s", e.what());
  1361. return INTERNAL_ERROR;
  1362. }
  1363. return SUCCESS;
  1364. }
  1365. Status ModelCacheHelper::SaveVarManagerToCache(bool before_build) const {
  1366. if (!is_cache_path_valid_for_output) {
  1367. GELOGW("Invalid cache path.");
  1368. return FAILED;
  1369. }
  1370. Json var_manager_json;
  1371. auto ret = GetVarManagerJson(var_manager_json);
  1372. if (ret != SUCCESS) {
  1373. GELOGW("Fail to generate VarManager json.");
  1374. return FAILED;
  1375. }
  1376. string var_manager_path = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) +
  1377. (before_build ? kBeforeVarManagerSuffix : kAfterVarManagerSuffix);
  1378. ret = SaveJsonToFile(var_manager_path, var_manager_json);
  1379. if (ret != SUCCESS) {
  1380. GELOGW("Fail to save VarManager info to json file, path: %s.", cache_path_.c_str());
  1381. return ret;
  1382. }
  1383. return SUCCESS;
  1384. }
  1385. Status ModelCacheHelper::SaveOmModelToCache(const GeModelPtr &ge_model) const {
  1386. if (!is_cache_path_valid_for_output) {
  1387. GELOGW("Invalid cache path.");
  1388. return FAILED;
  1389. }
  1390. string om_path = RealPath(cache_path_.c_str());
  1391. if (om_path.empty()) {
  1392. GELOGW("file path is invalid. please check path om: %s", cache_path_.c_str());
  1393. return FAILED;
  1394. }
  1395. string cache_om_path = cache_path_;
  1396. cache_om_path += (to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kOmSuffix);
  1397. GELOGI("SaveOmModelToCache: start to save om model : %s", cache_om_path.c_str());
  1398. ModelHelper model_helper;
  1399. SaveParam save_param;
  1400. ModelBufferData model;
  1401. Status ret = model_helper.SaveToOmModel(ge_model, save_param, cache_om_path, model);
  1402. if (ret != SUCCESS) {
  1403. GELOGW("SaveOmModelToCache: save mode failed. ret = %u", ret);
  1404. return ret;
  1405. }
  1406. return SUCCESS;
  1407. }
  1408. Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource) {
  1409. if (!(json.is_array() || json.is_null())) {
  1410. GELOGW("Input param json type should be null or array.");
  1411. return PARAM_INVALID;
  1412. }
  1413. mem_resource.clear();
  1414. for (const Json &mem_resource_json : json) {
  1415. MemResource var_addr_mgr;
  1416. try {
  1417. rtMemType_t mem_type = mem_resource_json[kMemType].get<rtMemType_t>();
  1418. uint64_t var_mem_size = mem_resource_json[kVarMemSize].get<int64_t>();
  1419. mem_resource[mem_type] = var_mem_size;
  1420. } catch (const exception &e) {
  1421. GELOGW("Fail to trans Json to MemResource. Error message: %s", e.what());
  1422. return INTERNAL_ERROR;
  1423. }
  1424. }
  1425. return SUCCESS;
  1426. }
  1427. Status ModelCacheHelper::ParseVarAddrMgrMapFromJson(
  1428. const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
  1429. std::unordered_set<uint64_t> &var_offset_set) {
  1430. if (!(json.is_array() || json.is_null())) {
  1431. GELOGW("Input param json type should be null or array.");
  1432. return PARAM_INVALID;
  1433. }
  1434. var_addr_mgr_vector.clear();
  1435. var_offset_set.clear();
  1436. for (const Json &var_addr_json : json) {
  1437. VarAddrMgr var_addr_mgr;
  1438. try {
  1439. auto logic_address = var_addr_json[kAddress].get<uint64_t>();
  1440. auto address = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_address));
  1441. var_addr_mgr.address = address;
  1442. var_addr_mgr.offset = var_addr_json[kOffset].get<uint64_t>();
  1443. var_addr_mgr.memory_type = var_addr_json[kMemoryType].get<rtMemType_t>();
  1444. auto ret = JsonToTensorDesc(var_addr_json[kTensorDesc], var_addr_mgr.tensor_desc);
  1445. if (ret != SUCCESS) {
  1446. GELOGW("Fail to trans json to tensor desc.");
  1447. return ret;
  1448. }
  1449. var_addr_mgr_vector.emplace_back(var_addr_json[kName].get<string>(), move(var_addr_mgr));
  1450. var_offset_set.insert(logic_address);
  1451. } catch (const exception &e) {
  1452. GELOGW("Fail to trans Json to VarAddrMgr. Error message: %s", e.what());
  1453. return INTERNAL_ERROR;
  1454. }
  1455. }
  1456. return SUCCESS;
  1457. }
  1458. Status ModelCacheHelper::ParseCurVarTensorDescMapFromJson(
  1459. const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map) {
  1460. if (!(json.is_array() || json.is_null())) {
  1461. GELOGW("Input param json type should be null or array.");
  1462. return PARAM_INVALID;
  1463. }
  1464. cur_var_tensor_desc_map.clear();
  1465. for (const Json &tensor_desc_json : json) {
  1466. GeTensorDesc tensor_desc;
  1467. try {
  1468. auto ret = JsonToTensorDesc(tensor_desc_json[kTensorDesc], tensor_desc);
  1469. if (ret != SUCCESS) {
  1470. GELOGW("Fail to trans json to tensor desc.");
  1471. return ret;
  1472. }
  1473. cur_var_tensor_desc_map[tensor_desc_json[kName].get<string>()] = move(tensor_desc);
  1474. } catch (const exception &e) {
  1475. GELOGW("Fail to trans Json to VarAddrMgr. Error message: %s", e.what());
  1476. return INTERNAL_ERROR;
  1477. }
  1478. }
  1479. return SUCCESS;
  1480. }
  1481. Status ModelCacheHelper::ParseTransRoadsFromJson(
  1482. const Json &json, std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads) {
  1483. if (!(json.is_array() || json.is_null())) {
  1484. GELOGW("Input param json type should be null or array.");
  1485. return PARAM_INVALID;
  1486. }
  1487. trans_roads.clear();
  1488. try {
  1489. for (const Json &name_trans_road_json : json) {
  1490. const Json &trans_road_json = name_trans_road_json[kTransRoad];
  1491. if (!(trans_road_json.is_array() || trans_road_json.is_null())) {
  1492. GELOGW("%s json type should be null or object.", kTransRoad);
  1493. return PARAM_INVALID;
  1494. }
  1495. vector<TransNodeInfo> trans_road;
  1496. for (const Json &trans_node_json : trans_road_json) {
  1497. TransNodeInfo trans_node_info;
  1498. trans_node_info.node_type = trans_node_json[kNodeType];
  1499. GeTensorDesc input_tensor_desc;
  1500. auto ret = JsonToTensorDesc(trans_node_json[kInputTensorDesc], input_tensor_desc);
  1501. if (ret != SUCCESS) {
  1502. GELOGW("Fail to trans json to tensor desc.");
  1503. return ret;
  1504. }
  1505. trans_node_info.input = move(input_tensor_desc);
  1506. GeTensorDesc output_tensor_desc;
  1507. ret = JsonToTensorDesc(trans_node_json[kOutputTensorDesc], output_tensor_desc);
  1508. if (ret != SUCCESS) {
  1509. GELOGW("Fail to trans json to tensor desc.");
  1510. return ret;
  1511. }
  1512. trans_node_info.output = move(output_tensor_desc);
  1513. trans_road.emplace_back(move(trans_node_info));
  1514. }
  1515. trans_roads[name_trans_road_json[kName].get<string>()] = move(trans_road);
  1516. }
  1517. } catch (const exception &e) {
  1518. GELOGW("Fail to trans Json to TransRoads. Error message: %s", e.what());
  1519. return INTERNAL_ERROR;
  1520. }
  1521. return SUCCESS;
  1522. }
  1523. Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
  1524. std::unordered_map<std::string, uint32_t> &changed_graph_id) {
  1525. if (!(json.is_array() || json.is_null())) {
  1526. GELOGW("Input param json type should be null or array.");
  1527. return PARAM_INVALID;
  1528. }
  1529. changed_graph_id.clear();
  1530. for (const Json &name_graph_id_json : json) {
  1531. try {
  1532. changed_graph_id[name_graph_id_json[kName].get<string>()] = name_graph_id_json[kGraphId].get<uint32_t>();
  1533. } catch (const exception &e) {
  1534. GELOGW("Fail to trans Json to changed graph id. Error message: %s", e.what());
  1535. return INTERNAL_ERROR;
  1536. }
  1537. }
  1538. return SUCCESS;
  1539. }
  1540. Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json,
  1541. std::unordered_map<std::string, uint32_t> &allocated_graph_id) {
  1542. if (!(json.is_array() || json.is_null())) {
  1543. GELOGW("Input param json type should be null or array.");
  1544. return PARAM_INVALID;
  1545. }
  1546. allocated_graph_id.clear();
  1547. for (const Json &name_graph_id_json : json) {
  1548. try {
  1549. allocated_graph_id[name_graph_id_json[kName].get<string>()] = name_graph_id_json[kGraphId].get<uint32_t>();
  1550. } catch (const exception &e) {
  1551. GELOGW("Fail to trans Json to allocated graph id. Error message: %s", e.what());
  1552. return INTERNAL_ERROR;
  1553. }
  1554. }
  1555. return SUCCESS;
  1556. }
  1557. Status ModelCacheHelper::ParseBroadcastInfoFromJson(
  1558. const Json &json, std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info) {
  1559. if (!(json.is_array() || json.is_null())) {
  1560. GELOGW("Input param json type should be null or array.");
  1561. return PARAM_INVALID;
  1562. }
  1563. for (const Json &broadcast_info_json : json) {
  1564. VarBroadCastInfo broadcast_info;
  1565. try {
  1566. broadcast_info.var_name = broadcast_info_json[kName].get<string>();
  1567. broadcast_info.broadcast_name = broadcast_info_json[kBroadcastName].get<string>();
  1568. broadcast_info.idx = broadcast_info_json[kIdx].get<int>();
  1569. broadcast_info.input_offset = broadcast_info_json[kInputOffset].get<int64_t>();
  1570. broadcast_info.input_size = broadcast_info_json[kInputSize].get<uint64_t>();
  1571. broadcast_info.output_offset = broadcast_info_json[kOutputOffset].get<int64_t>();
  1572. broadcast_info.output_size = broadcast_info_json[kOutputSize].get<uint64_t>();
  1573. } catch (const exception &e) {
  1574. GELOGW("Fail to trans Json to VarBroadCastInfo. Error message: %s", e.what());
  1575. return INTERNAL_ERROR;
  1576. }
  1577. var_broadcast_info[broadcast_info.var_name] = broadcast_info;
  1578. }
  1579. return SUCCESS;
  1580. }
  1581. Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
  1582. string cache_om = cache_path_ + to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kOmSuffix;
  1583. if (!CheckInputPathValid(cache_om)) {
  1584. GELOGW("Invalid cache path for input:%s.", cache_om.c_str());
  1585. return FAILED;
  1586. }
  1587. string om_path = RealPath(cache_om.c_str());
  1588. if (om_path.empty()) {
  1589. GELOGW("file path is invalid. please check file om: %s", om_path.c_str());
  1590. return FAILED;
  1591. }
  1592. GELOGI("load model data from file: %s", om_path.c_str());
  1593. Status ret;
  1594. string key_path;
  1595. int32_t priority = 0;
  1596. ModelData model_data;
  1597. ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
  1598. if (ret != SUCCESS) {
  1599. GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
  1600. return ret;
  1601. }
  1602. ModelHelper model_helper;
  1603. ret = model_helper.LoadModel(model_data);
  1604. if (ret != SUCCESS) {
  1605. GELOGW("LoadOmModelFromCache: Load model from data failed. ret = %u", ret);
  1606. return ret;
  1607. }
  1608. ge_model = model_helper.GetGeModel();
  1609. ret = RecompileNodes(ge_model);
  1610. if (ret != SUCCESS) {
  1611. GELOGW("LoadOmModelFromCache: recompile nodes failed. ret = %u", ret);
  1612. return ret;
  1613. }
  1614. return SUCCESS;
  1615. }
  1616. Status ModelCacheHelper::GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc,
  1617. string &var_name) {
  1618. std::string::size_type underline_idx = var_key.rfind('_');
  1619. if (underline_idx == std::string::npos) {
  1620. GELOGW("Invalid var key: underline not found");
  1621. return FAILED;
  1622. }
  1623. std::string::size_type format_idx =
  1624. var_key.rfind(std::to_string(static_cast<int32_t>(tensor_desc.GetFormat())), underline_idx);
  1625. if (format_idx == std::string::npos) {
  1626. GELOGW("Invalid var key: format not found");
  1627. return FAILED;
  1628. }
  1629. var_name = var_key.substr(0, format_idx);
  1630. return SUCCESS;
  1631. }
  1632. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示