You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

model_cache_helper.cc 66 kB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <fcntl.h>
  17. #include <unistd.h>
  18. #include <climits>
  19. #include <cstdio>
  20. #include <fstream>
  21. #include <functional>
  22. #include "common/ge/ge_util.h"
  23. #include "common/helper/model_cache_helper.h"
  24. #include "common/types.h"
  25. #include "framework/common/debug/ge_log.h"
  26. #include "framework/common/ge_types.h"
  27. #include "framework/common/helper/model_helper.h"
  28. #include "framework/common/util.h"
  29. #include "graph/detail/attributes_holder.h"
  30. #include "graph/detail/model_serialize_imp.h"
  31. #include "graph/load/new_model_manager/davinci_model_parser.h"
  32. #include "graph/model.h"
  33. #include "graph/utils/graph_utils.h"
  34. #include "graph/utils/tensor_utils.h"
  35. #include "init/gelib.h"
  36. #include "proto/ge_ir.pb.h"
  37. using namespace std;
  38. namespace {
  39. const char *const kTbeKernelInfoStoreName = "AIcoreEngine";
  40. const char *const kGraphName = "temp_name";
  41. // Keys of json
  42. const char *const kNodeNum = "nodeNum";
  43. const char *const kEdgeNum = "edgeNum";
  44. const char *const kGraphHash = "graphHash";
  45. const char *const kNodeHash = "nodeHash";
  46. const char *const kHash = "hash";
  47. const char *const kSessionId = "sessionId";
  48. const char *const kDeviceId = "deviceId";
  49. const char *const kJobId = "jobId";
  50. const char *const kGraphMemMaxSize = "graphMemMaxSize";
  51. const char *const kVarMemMaxSize = "varMemMaxSize";
  52. const char *const kVarMemLogicBase = "varMemLogicBase";
  53. const char *const kUseMaxMemSize = "useMaxMemSize";
  54. const char *const kMemResourceMap = "memResourceMap";
  55. const char *const kMemType = "memType";
  56. const char *const kTotalSize = "totalSize";
  57. const char *const kVarMemSize = "varMemSize";
  58. const char *const kVarResource = "varResource";
  59. const char *const kVarAddrMgrMap = "varAddrMgrMap";
  60. const char *const kName = "name";
  61. const char *const kAddress = "address";
  62. const char *const kOffset = "offset";
  63. const char *const kMemoryType = "memoryType";
  64. const char *const kTensorDesc = "tensorDesc";
  65. const char *const kDataType = "dataType";
  66. const char *const kShape = "shape";
  67. const char *const kLayout = "layout";
  68. const char *const kOriginDataType = "originDataType";
  69. const char *const kOriginShape = "originShape";
  70. const char *const kOriginLayout = "originLayout";
  71. const char *const kRealDimCnt = "realDimCnt";
  72. const char *const kCurVarTensorDescMap = "curVarTensorDescMap";
  73. const char *const kTransRoads = "transRoads";
  74. const char *const kTransRoad = "transRoad";
  75. const char *const kNodeType = "nodeType";
  76. const char *const kInputTensorDesc = "inputTensorDesc";
  77. const char *const kOutputTensorDesc = "outputTensorDesc";
  78. const char *const kChangedGraphId = "changedGraphId";
  79. const char *const kAllocatedGraphId = "allocatedGraphId";
  80. const char *const kGraphId = "graphId";
  81. const char *const kVarBroadcastInfo = "varBroadcastInfo";
  82. const char *const kBroadcastName = "broadcastName";
  83. const char *const kIdx = "idx";
  84. const char *const kInputOffset = "inputOffset";
  85. const char *const kInputSize = "inputSize";
  86. const char *const kOutputOffset = "outputOffset";
  87. const char *const kOutputSize = "outputSize";
  88. // Suffix of cache files
  89. const char *const kBeforeVarManagerSuffix = "_before_build_var_manager.json";
  90. const char *const kAfterVarManagerSuffix = "_after_build_var_manager.json";
  91. const char *const kManifestSuffix = ".manifest";
  92. const char *const kOmSuffix = ".om";
  93. } // namespace
  94. namespace ge {
  95. map<uint32_t, uint32_t> ModelCacheHelper::graph_id_run_times_;
  96. ModelCacheHelper::ModelCacheHelper(uint64_t session_id, uint32_t graph_id, ComputeGraphPtr &compute_graph)
  97. : session_id_(session_id),
  98. graph_id_(graph_id),
  99. compute_graph_(compute_graph),
  100. is_cache_path_valid_for_output(false) {
  101. if (graph_id_run_times_.count(graph_id) == 0) {
  102. graph_id_run_times_[graph_id] = 1;
  103. } else {
  104. graph_id_run_times_[graph_id] = graph_id_run_times_[graph_id] + 1;
  105. }
  106. for (const auto &node : compute_graph_->GetDirectNode()) {
  107. bool is_variable = (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) ||
  108. (node->GetType() == VARHANDLEOP) || (node->GetType() == CONSTANTOP);
  109. if (!is_variable) {
  110. continue;
  111. }
  112. var_names_.insert(node->GetName());
  113. }
  114. std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
  115. if (instance_ptr != nullptr && instance_ptr->IsIncreBuild()) {
  116. std::string cache_path = instance_ptr->GetIncreBuildCachePath();
  117. GELOGD("Incre build path conf: %s", cache_path.c_str());
  118. string fake_file_path = cache_path + to_string(graph_id_) + kManifestSuffix;
  119. if (CheckOutputPathValid(fake_file_path)) {
  120. is_cache_path_valid_for_output = true;
  121. } else {
  122. GELOGW("Invalid cache path for output.");
  123. }
  124. std::string real_cache_path = RealPath(cache_path.c_str());
  125. if (real_cache_path.empty()) {
  126. GELOGW("Invalid incre build cache path conf: %s", cache_path.c_str());
  127. return;
  128. }
  129. cache_path_ = real_cache_path + '/';
  130. GELOGD("Try to use incre build cache path: %s", cache_path_.c_str());
  131. }
  132. }
  133. ModelCacheHelper::~ModelCacheHelper() { var_names_.clear(); }
  134. bool ModelCacheHelper::IsModelCacheHit() const {
  135. CacheInfo cache_info;
  136. if (GetCacheInfo(cache_info) != SUCCESS) {
  137. GELOGI("Get cache info of graph id[%u] failed.", graph_id_);
  138. return false;
  139. }
  140. // Check number of nodes and edges first.
  141. if (cache_info.node_num != compute_graph_->GetDirectNodesSize()) {
  142. GELOGI("Graph id[%u] cache miss: the node number of the graph does not match the cache info.", graph_id_);
  143. return false;
  144. }
  145. size_t edge_num = 0;
  146. for (const auto &node : compute_graph_->GetDirectNode()) {
  147. for (const auto &anchor : node->GetAllInAnchors()) {
  148. edge_num += anchor->GetPeerAnchors().size();
  149. }
  150. }
  151. if (cache_info.edge_num != edge_num) {
  152. GELOGI("Graph id[%u] cache miss: the edge number of the graph does not match the cache info.", graph_id_);
  153. return false;
  154. }
  155. size_t compute_graph_hash;
  156. auto ret = GetComputeGraphHash(compute_graph_hash);
  157. if (ret != SUCCESS || cache_info.graph_hash != compute_graph_hash) {
  158. GELOGI("Graph id[%u] cache miss: the hash code of the graph does not match the cache info.", graph_id_);
  159. return false;
  160. }
  161. if (!IsNodeHashSameAsCache(cache_info.nodes_hash)) {
  162. GELOGI("Graph id[%u] cache miss: the hash code of node does not match the cache info.", graph_id_);
  163. return false;
  164. }
  165. string var_manager_cache =
  166. to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kBeforeVarManagerSuffix;
  167. Json var_manager_json;
  168. if (LoadJsonFromFile(var_manager_cache, var_manager_json) != SUCCESS) {
  169. GELOGW("Fail to load json from cache file: %s", var_manager_cache.c_str());
  170. return false;
  171. }
  172. if (!IsVarManagerSameAsCache(var_manager_json)) {
  173. GELOGI("Graph id[%u] cache miss: the VarManager does not match the cache info.", graph_id_);
  174. return false;
  175. }
  176. GELOGI("Graph id[%u] cache hit.", graph_id_);
  177. return true;
  178. }
  179. Status ModelCacheHelper::RefreshComputeGraph(const ComputeGraphPtr &compute_graph) {
  180. if (compute_graph->IsValid()) {
  181. compute_graph_ = compute_graph;
  182. var_names_.clear();
  183. for (const auto &node : compute_graph_->GetDirectNode()) {
  184. bool is_variable = (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) ||
  185. (node->GetType() == VARHANDLEOP) || (node->GetType() == CONSTANTOP);
  186. if (!is_variable) {
  187. continue;
  188. }
  189. var_names_.insert(node->GetName());
  190. }
  191. return SUCCESS;
  192. } else {
  193. GELOGW("Invalid compute graph.");
  194. return FAILED;
  195. }
  196. }
  197. Status ModelCacheHelper::ClearCache(uint32_t graph_id) const {
  198. if (!is_cache_path_valid_for_output) {
  199. GELOGW("Invalid cache path.");
  200. return SUCCESS;
  201. }
  202. string manifest_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  203. string manifest_file_path = RealPath(manifest_file.c_str());
  204. int ret;
  205. if (!manifest_file_path.empty()) {
  206. ret = remove(manifest_file_path.c_str());
  207. // If remove file failed, print the warning log
  208. if (ret != 0) {
  209. GELOGW("Clear cache [%s] failed.", manifest_file_path.c_str());
  210. }
  211. }
  212. string before_var_manager_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  213. string before_var_manager_file_path = RealPath(before_var_manager_file.c_str());
  214. if (!before_var_manager_file_path.empty()) {
  215. ret = remove(before_var_manager_file_path.c_str());
  216. if (ret != 0) {
  217. GELOGW("Clear cache [%s] failed.", before_var_manager_file_path.c_str());
  218. }
  219. }
  220. string after_var_manager_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  221. string after_var_manager_file_path = RealPath(after_var_manager_file.c_str());
  222. if (!after_var_manager_file_path.empty()) {
  223. ret = remove(after_var_manager_file_path.c_str());
  224. if (ret != 0) {
  225. GELOGW("Clear cache [%s] failed.", after_var_manager_file_path.c_str());
  226. }
  227. }
  228. string om_file = cache_path_ + to_string(graph_id) + kManifestSuffix;
  229. string om_file_path = RealPath(om_file.c_str());
  230. if (!om_file_path.empty()) {
  231. ret = remove(om_file_path.c_str());
  232. if (ret != 0) {
  233. GELOGW("Clear cache [%s] failed.", om_file_path.c_str());
  234. }
  235. }
  236. return SUCCESS;
  237. }
  238. Status ModelCacheHelper::RecoverVarManagerFromCache() const {
  239. string var_manager_cache =
  240. to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kAfterVarManagerSuffix;
  241. Json var_manager_json;
  242. if (LoadJsonFromFile(var_manager_cache, var_manager_json) != SUCCESS) {
  243. GELOGW("Fail to load json from cache file: %s", var_manager_cache.c_str());
  244. return FAILED;
  245. }
  246. Json mem_resource_json = move(var_manager_json[kMemResourceMap]);
  247. auto ret = RecoverMemResource(mem_resource_json);
  248. if (ret != SUCCESS) {
  249. GELOGW("Recover VarManager from cache failed.[MemResource]");
  250. return FAILED;
  251. }
  252. Json var_resource_json = move(var_manager_json[kVarResource]);
  253. ret = RecoverAllocatedGraphId(var_resource_json[kAllocatedGraphId]);
  254. if (ret != SUCCESS) {
  255. GELOGW("Recover VarManager from cache failed.[AllocatedGraphId]");
  256. return FAILED;
  257. }
  258. ret = RecoverChangedGraphId(var_resource_json[kChangedGraphId]);
  259. if (ret != SUCCESS) {
  260. GELOGW("Recover VarManager from cache failed.[ChangedGraphId]");
  261. return FAILED;
  262. }
  263. ret = RecoverBroadcastInfo(var_resource_json[kVarBroadcastInfo]);
  264. if (ret != SUCCESS) {
  265. GELOGW("Recover VarManager from cache failed.[VarBroadcastInfo]");
  266. return FAILED;
  267. }
  268. ret = RecoverVarAddrAndTensorDesc(var_resource_json[kVarAddrMgrMap]);
  269. if (ret != SUCCESS) {
  270. GELOGW("Recover VarManager from cache failed.[VarAddrMgrMap & CurVarTensorDesc]");
  271. return FAILED;
  272. }
  273. ret = RecoverTransRoads(var_resource_json[kTransRoads]);
  274. if (ret != SUCCESS) {
  275. GELOGW("Recover VarManager from cache failed.[TransRoads]");
  276. return FAILED;
  277. }
  278. GELOGI("Recover VarManager from cache[%s] success.", cache_path_.c_str());
  279. return SUCCESS;
  280. }
  281. Status ModelCacheHelper::GetNodesNeedRecompile(ComputeGraphPtr &graph, vector<NodePtr> &nodes) {
  282. std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
  283. if (instance == nullptr || !instance->InitFlag()) {
  284. GELOGW("RecompileNodes failed.");
  285. return ge::GE_CLI_GE_NOT_INITIALIZED;
  286. }
  287. // Collect aicore ops for recompile
  288. for (auto &node : graph->GetDirectNode()) {
  289. if (node == nullptr) {
  290. continue;
  291. }
  292. auto op_desc = node->GetOpDesc();
  293. if (op_desc == nullptr) {
  294. continue;
  295. }
  296. // Get op kernel lib name
  297. string kernel_lib_name = op_desc->GetOpKernelLibName();
  298. if (kernel_lib_name.empty()) {
  299. // reset op kernel lib
  300. (void)instance->DNNEngineManagerObj().GetDNNEngineName(node);
  301. kernel_lib_name = op_desc->GetOpKernelLibName();
  302. if (kernel_lib_name.empty()) {
  303. GELOGW("Get node:%s, type:%s kernel lib failed.", node->GetName().c_str(), op_desc->GetType().c_str());
  304. continue;
  305. }
  306. }
  307. }
  308. return SUCCESS;
  309. }
  310. Status ModelCacheHelper::RecompileNodes(GeModelPtr &ge_model) {
  311. std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
  312. if (instance == nullptr || !instance->InitFlag()) {
  313. GELOGW("RecompileNodes failed.");
  314. return ge::GE_CLI_GE_NOT_INITIALIZED;
  315. }
  316. // Get aicore ops kernel info store.
  317. OpsKernelInfoStorePtr kernel_info = instance->OpsKernelManagerObj().GetOpsKernelInfoStore(kTbeKernelInfoStoreName);
  318. if (kernel_info == nullptr) {
  319. GELOGW("Get %s ops kernel info store failed", kTbeKernelInfoStoreName);
  320. return INTERNAL_ERROR;
  321. }
  322. auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
  323. vector<NodePtr> node_vec;
  324. auto ret = GetNodesNeedRecompile(compute_graph, node_vec);
  325. GE_CHK_BOOL_EXEC_WARN(ret == ge::SUCCESS, return ret, "Get nodes need recompiling failed");
  326. // Recompile aicore ops
  327. ret = kernel_info->CompileOp(node_vec);
  328. GE_CHK_BOOL_EXEC_WARN(ret == ge::SUCCESS, return ret, "Recompile op failed");
  329. const TBEKernelStore &tbekernel_store = ge_model->GetTBEKernelStore();
  330. TBEKernelStore tbe_kernel_store;
  331. for (const ge::NodePtr &n : compute_graph->GetDirectNode()) {
  332. auto node_op_desc = n->GetOpDesc();
  333. GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
  334. TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  335. if (tbe_kernel == nullptr) {
  336. // Load tbe kernel from tbe_kernel_store to op if op was not recompiled
  337. auto op_desc = n->GetOpDesc();
  338. tbekernel_store.LoadTBEKernelBinToOpDesc(op_desc);
  339. GELOGD("LoadOmModelFromCache: Load tbe kernel bin to op desc[%s].", op_desc->GetName().c_str());
  340. }
  341. tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
  342. GE_IF_BOOL_EXEC(tbe_kernel == nullptr, continue);
  343. // Refresh tbe kernel in tbe_kernel_store
  344. tbe_kernel_store.AddTBEKernel(tbe_kernel);
  345. GELOGD("Add tbe kernel bin %s", tbe_kernel->GetName().c_str());
  346. }
  347. GE_CHK_BOOL_EXEC_WARN(tbe_kernel_store.Build(), return FAILED, "TBE Kernels store build failed!");
  348. ge_model->SetTBEKernelStore(tbe_kernel_store);
  349. return SUCCESS;
  350. }
  351. Status ModelCacheHelper::GetNodesHash(map<std::string, size_t> &hash_map) const {
  352. vector<NodePtr> nodes;
  353. GraphUtils::TopologicalSortingByName(compute_graph_, nodes);
  354. ModelSerializeImp model_serialize_imp;
  355. std::hash<string> node_hash;
  356. for (const auto &node : nodes) {
  357. if (node == nullptr) {
  358. continue;
  359. }
  360. proto::OpDef op_def;
  361. bool is_framework_op = (node->GetType() == FRAMEWORKOP);
  362. int32_t framework_type = 0;
  363. if (is_framework_op) {
  364. AttrUtils::GetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, framework_type);
  365. AttrUtils::SetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, 0);
  366. }
  367. bool ret = model_serialize_imp.SerializeNode(node, &op_def, is_framework_op);
  368. op_def.set_id(0); // Id of op is not stable because of parallel parsing
  369. // Clear weights attr in constant.
  370. auto attr = op_def.mutable_attr();
  371. if (op_def.type() == CONSTANT || op_def.type() == CONSTANTOP) {
  372. attr->erase(ATTR_NAME_WEIGHTS);
  373. }
  374. if (is_framework_op) {
  375. AttrUtils::SetInt(node->GetOpDesc(), ge::ATTR_NAME_FRAMEWORK_FWK_TYPE, framework_type);
  376. }
  377. if (!ret) {
  378. GELOGW("Fail to serialize node[%s].", node->GetName().c_str());
  379. return INTERNAL_ERROR;
  380. }
  381. string prototxt;
  382. ret = google::protobuf::TextFormat::PrintToString(op_def, &prototxt);
  383. if (!ret) {
  384. GELOGW("Print OpDef to string failed.");
  385. hash_map.clear();
  386. return INTERNAL_ERROR;
  387. }
  388. size_t hash_code = node_hash(prototxt);
  389. hash_map[node->GetName()] = hash_code;
  390. }
  391. return SUCCESS;
  392. }
  393. Status ModelCacheHelper::GetComputeGraphHash(size_t &hash) const {
  394. proto::GraphDef graph_proto;
  395. ModelSerializeImp model_serialize_imp;
  396. // The name of compute graph may be generated randomly, so replace it temporarily.
  397. const string origin_name = compute_graph_->GetName();
  398. compute_graph_->SetName(kGraphName);
  399. bool serialize_ret = model_serialize_imp.SerializeGraph(compute_graph_, &graph_proto);
  400. graph_proto.clear_op();
  401. if (!serialize_ret) {
  402. GELOGW("Serialize graph failed.");
  403. hash = 0;
  404. return INTERNAL_ERROR;
  405. }
  406. compute_graph_->SetName(origin_name);
  407. // Generate proto text of GraphDef
  408. string prototxt;
  409. bool print_ret = google::protobuf::TextFormat::PrintToString(graph_proto, &prototxt);
  410. if (!print_ret) {
  411. GELOGW("Print GraphDef to string failed.");
  412. hash = 0;
  413. return INTERNAL_ERROR;
  414. }
  415. // Get the hash code of proto text
  416. std::hash<string> graph_hash;
  417. hash = graph_hash(prototxt);
  418. return SUCCESS;
  419. }
  420. Status ModelCacheHelper::SaveJsonToFile(const string &file_name, const Json &json) const {
  421. if (!is_cache_path_valid_for_output) {
  422. GELOGW("Invalid cache path.");
  423. return PARAM_INVALID;
  424. }
  425. // Check whether the manifest exists, if not, create it.
  426. string real_path = RealPath(cache_path_.c_str());
  427. if (real_path.empty()) {
  428. GELOGW("File path is invalid. please check cache path: %s", cache_path_.c_str());
  429. return FAILED;
  430. }
  431. const string path = cache_path_ + file_name;
  432. const int FILE_AUTHORITY = 0600;
  433. int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY);
  434. if (fd < 0) {
  435. GELOGW("Fail to open the file: %s.", path.c_str());
  436. return INTERNAL_ERROR;
  437. }
  438. if (close(fd) != 0) {
  439. GELOGW("Fail to close the file: %s.", path.c_str());
  440. return INTERNAL_ERROR;
  441. }
  442. // Write json into cache file
  443. ofstream ofs;
  444. ofs.open(path);
  445. if (!ofs.is_open()) {
  446. GELOGW("Fail to open the file: %s.", path.c_str());
  447. return INTERNAL_ERROR;
  448. }
  449. ofs << json << std::endl;
  450. ofs.close();
  451. return SUCCESS;
  452. }
  453. Status ModelCacheHelper::LoadJsonFromFile(const string &file_name, Json &json) const {
  454. if (!json.is_null()) {
  455. GELOGW("Input param json type should be null.");
  456. return PARAM_INVALID;
  457. }
  458. string real_path = RealPath(cache_path_.c_str());
  459. if (real_path.empty()) {
  460. GELOGW("File path is invalid. please check cache path: %s", cache_path_.c_str());
  461. return FAILED;
  462. }
  463. const string path = cache_path_ + file_name;
  464. if (!CheckInputPathValid(path)) {
  465. GELOGW("Invalid cache path for input:%s.", path.c_str());
  466. return FAILED;
  467. }
  468. string cache_real_path = RealPath(path.c_str());
  469. if (cache_real_path.empty()) {
  470. GELOGI("File[%s] is not found.", path.c_str());
  471. return FAILED;
  472. }
  473. // Read json from cache file
  474. ifstream ifs;
  475. ifs.open(path);
  476. if (!ifs.is_open()) {
  477. GELOGW("Fail to open the file: %s.", path.c_str());
  478. return INTERNAL_ERROR;
  479. }
  480. try {
  481. ifs >> json;
  482. } catch (nlohmann::detail::parse_error e) {
  483. GELOGW("Fail to load json from file, json throw an error:%s.", e.what());
  484. return INTERNAL_ERROR;
  485. } catch (nlohmann::detail::invalid_iterator e) {
  486. GELOGW("Fail to load json from file, json throw an error:%s.", e.what());
  487. return INTERNAL_ERROR;
  488. } catch (nlohmann::detail::type_error e) {
  489. GELOGW("Fail to load json from file, json throw an error:%s.", e.what());
  490. return INTERNAL_ERROR;
  491. } catch (nlohmann::detail::out_of_range e) {
  492. GELOGW("Fail to load json from file, json throw an error:%s.", e.what());
  493. return INTERNAL_ERROR;
  494. } catch (nlohmann::detail::other_error e) {
  495. GELOGW("Fail to load json from file, json throw an error:%s.", e.what());
  496. return INTERNAL_ERROR;
  497. }
  498. if (!json.is_object()) {
  499. GELOGW("Fail to load the json file: %s.", path.c_str());
  500. return INTERNAL_ERROR;
  501. }
  502. return SUCCESS;
  503. }
  504. Status ModelCacheHelper::SaveCacheInfoToCache() const {
  505. // Generate cache json
  506. // example: {"edgeNum":6,"nodeNum":7,"graphCache":134714827475991356}
  507. Json cache_json;
  508. try {
  509. cache_json[kNodeNum] = compute_graph_->GetDirectNodesSize();
  510. size_t edge_num = 0;
  511. for (const auto &node : compute_graph_->GetDirectNode()) {
  512. for (const auto &anchor : node->GetAllInAnchors()) {
  513. edge_num += anchor->GetPeerAnchors().size();
  514. }
  515. }
  516. cache_json[kEdgeNum] = edge_num;
  517. size_t hash = 0;
  518. auto ret = GetComputeGraphHash(hash);
  519. if (ret != SUCCESS) {
  520. GELOGW("Error occur when generate graph hash code.");
  521. return ret;
  522. }
  523. cache_json[kGraphHash] = hash;
  524. Json nodes_hash_json;
  525. ret = GetNodesHashMapJson(nodes_hash_json);
  526. if (ret != SUCCESS) {
  527. GELOGW("Error occur when generate nodes hash code.");
  528. return ret;
  529. }
  530. cache_json[kNodeHash] = nodes_hash_json;
  531. } catch (const std::exception &e) {
  532. GELOGW("Fail to generate cache info json. Error message: %s", e.what());
  533. return INTERNAL_ERROR;
  534. }
  535. string cache_manifest = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kManifestSuffix;
  536. auto ret = SaveJsonToFile(cache_manifest, cache_json);
  537. if (ret != SUCCESS) {
  538. GELOGW("Fail to save cache info to json file, path: %s.", cache_path_.c_str());
  539. return ret;
  540. }
  541. return SUCCESS;
  542. }
  543. Status ModelCacheHelper::GetCacheInfo(CacheInfo &cache_info) const {
  544. string cache_manifest = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kManifestSuffix;
  545. Json cache_json;
  546. if (LoadJsonFromFile(cache_manifest, cache_json) != SUCCESS) {
  547. GELOGW("Fail to load json from cache file: %s", cache_manifest.c_str());
  548. return INTERNAL_ERROR;
  549. }
  550. if (!cache_json.is_object()) {
  551. GELOGW("Manifest should be a json object");
  552. return INTERNAL_ERROR;
  553. }
  554. try {
  555. cache_info.node_num = cache_json[kNodeNum];
  556. cache_info.edge_num = cache_json[kEdgeNum];
  557. cache_info.graph_hash = cache_json[kGraphHash];
  558. Json nodes_hash_json = cache_json[kNodeHash];
  559. if (!(nodes_hash_json.is_null() || nodes_hash_json.is_array())) {
  560. GELOGW("Nodes hash in cache should be null or array.");
  561. return FAILED;
  562. }
  563. for (const auto &iter : nodes_hash_json) {
  564. cache_info.nodes_hash[iter[kName].get<std::string>()] = iter[kHash].get<size_t>();
  565. }
  566. } catch (const std::exception &e) {
  567. GELOGW("Fail to get info from json file. Error message: %s", e.what());
  568. return INTERNAL_ERROR;
  569. }
  570. return SUCCESS;
  571. }
  572. bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const {
  573. if (!(json.is_null() || json.is_array())) {
  574. GELOGW("Input param json type should be null or array.");
  575. return false;
  576. }
  577. // Compare allocated graph id info between json and VarManager
  578. std::unordered_map<std::string, uint32_t> allocated_graph_id;
  579. auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  580. if (ret != SUCCESS) {
  581. GELOGW("Fail to parse AllocatedGraphId from Json.");
  582. return false;
  583. }
  584. for (const auto &iter : allocated_graph_id) {
  585. uint32_t graph_id = 0;
  586. ret = VarManager::Instance(session_id_)->GetAllocatedGraphId(iter.first, graph_id);
  587. if (ret != SUCCESS) {
  588. GELOGW("Fail to find allocated graph id of var[%s].", iter.first.c_str());
  589. return false;
  590. }
  591. if (graph_id != iter.second) {
  592. GELOGW("The allocated graph id of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  593. return false;
  594. }
  595. }
  596. return true;
  597. }
  598. bool ModelCacheHelper::IsNodeHashSameAsCache(const map<std::string, size_t> &hash_map) const {
  599. map<std::string, size_t> cur_hash_map;
  600. GetNodesHash(cur_hash_map);
  601. if (hash_map.size() != cur_hash_map.size()) {
  602. GELOGI("The number of hash code is different from cache info.");
  603. return false;
  604. }
  605. for (const auto &iter : cur_hash_map) {
  606. if (hash_map.count(iter.first) == 0) {
  607. GELOGI("Node[%s] is not found in cache info.", iter.first.c_str());
  608. return false;
  609. }
  610. if (hash_map.at(iter.first) != iter.second) {
  611. GELOGI("The hash code of node[%s] is different from cache info.", iter.first.c_str());
  612. return false;
  613. }
  614. }
  615. return true;
  616. }
  617. bool ModelCacheHelper::IsMemResourceSameAsCache(Json &json) const {
  618. if (!(json.is_null() || json.is_array())) {
  619. GELOGW("Input param json type should be null or array.");
  620. return false;
  621. }
  622. // Compare var mem size info between json and VarManager
  623. std::map<rtMemType_t, int64_t> var_mem_size;
  624. auto ret = ParseMemResourceFromJson(json, var_mem_size);
  625. if (ret != SUCCESS) {
  626. GELOGW("Fail to parse MemResource from Json.");
  627. return false;
  628. }
  629. for (const auto &iter : var_mem_size) {
  630. int64_t mem_size = VarManager::Instance(session_id_)->GetVarMemSize(iter.first);
  631. if (mem_size != iter.second) {
  632. GELOGW("The var mem size of memory_type[%u] in cache is different from VarManager.", iter.first);
  633. return false;
  634. }
  635. }
  636. return true;
  637. }
  638. bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const {
  639. if (!(json.is_null() || json.is_array())) {
  640. GELOGW("Input param json type should be null or array.");
  641. return false;
  642. }
  643. // Compare variable changed graph id info between json and VarManager
  644. std::unordered_map<std::string, uint32_t> changed_graph_id;
  645. auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  646. if (ret != SUCCESS) {
  647. GELOGW("Fail to parse ChangedGraphId from Json.");
  648. return false;
  649. }
  650. for (const auto &iter : changed_graph_id) {
  651. uint32_t graph_id = 0;
  652. ret = VarManager::Instance(session_id_)->GetChangedGraphId(iter.first, graph_id);
  653. if (ret != SUCCESS) {
  654. GELOGW("Fail to find changed graph id of var[%s].", iter.first.c_str());
  655. return false;
  656. }
  657. if (graph_id != iter.second) {
  658. GELOGW("The changed graph id of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  659. return false;
  660. }
  661. }
  662. return true;
  663. }
  664. bool ModelCacheHelper::IsCurVarTensorDescSameAsCache(Json &json) const {
  665. if (!(json.is_null() || json.is_array())) {
  666. GELOGW("Input param json type should be null or array.");
  667. return false;
  668. }
  669. // Compare variable tensor desc info between json and VarManager
  670. std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc;
  671. auto ret = ParseCurVarTensorDescMapFromJson(json, cur_var_tensor_desc);
  672. if (ret != SUCCESS) {
  673. GELOGW("Fail to parse CurVarTensorDesc from Json.");
  674. return false;
  675. }
  676. for (const auto &iter : cur_var_tensor_desc) {
  677. GeTensorDesc tensor_desc;
  678. ret = VarManager::Instance(session_id_)->GetCurVarDesc(iter.first, tensor_desc);
  679. if (ret != SUCCESS) {
  680. GELOGW("Fail to find tensor desc of var[%s].", iter.first.c_str());
  681. return false;
  682. }
  683. uint32_t l_real_dim_cnt = 0;
  684. uint32_t r_real_dim_cnt = 0;
  685. TensorUtils::GetRealDimCnt(tensor_desc, l_real_dim_cnt);
  686. TensorUtils::GetRealDimCnt(iter.second, r_real_dim_cnt);
  687. if ((tensor_desc.GetDataType() != iter.second.GetDataType()) ||
  688. (tensor_desc.GetOriginDataType() != iter.second.GetOriginDataType()) ||
  689. (tensor_desc.GetFormat() != iter.second.GetFormat()) ||
  690. (tensor_desc.GetOriginFormat() != iter.second.GetOriginFormat()) ||
  691. (tensor_desc.GetShape().ToString() != iter.second.GetShape().ToString()) ||
  692. (tensor_desc.GetOriginShape().ToString() != iter.second.GetOriginShape().ToString()) ||
  693. (l_real_dim_cnt != r_real_dim_cnt)) {
  694. GELOGW("The var tensor desc of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  695. return false;
  696. }
  697. }
  698. return true;
  699. }
  700. bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const {
  701. if (!(json.is_null() || json.is_array())) {
  702. GELOGW("Input param json type should be null or array.");
  703. return false;
  704. }
  705. // Compare variable address info between json and VarManager
  706. std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  707. std::unordered_set<uint64_t> var_offset_set;
  708. auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  709. if (ret != SUCCESS) {
  710. GELOGW("Fail to parse VarAddrMgrMap from Json.");
  711. return false;
  712. }
  713. for (const auto &iter : var_addr_mgr_vector) {
  714. uint8_t *dev_ptr = nullptr;
  715. rtMemType_t memory_type;
  716. ret = VarManager::Instance(session_id_)->GetVarAddr(iter.first, iter.second.tensor_desc, &dev_ptr, memory_type);
  717. if (ret != SUCCESS) {
  718. GELOGW("Fail to find tensor desc of var[%s].", iter.first.c_str());
  719. return false;
  720. }
  721. // Compare memory type and logic address
  722. if (iter.second.memory_type != memory_type || iter.second.address != dev_ptr) {
  723. GELOGW("The VarAddrMgr of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  724. return false;
  725. }
  726. }
  727. return true;
  728. }
  729. bool ModelCacheHelper::IsBroadcastInfoSameAsCache(Json &json) const {
  730. if (!(json.is_null() || json.is_array())) {
  731. GELOGW("Input param json type should be null or array.");
  732. return false;
  733. }
  734. // Compare broadcast info between json and VarManager
  735. std::unordered_map<std::string, VarBroadCastInfo> var_broadcast_info;
  736. auto ret = ParseBroadcastInfoFromJson(json, var_broadcast_info);
  737. if (ret != SUCCESS) {
  738. GELOGW("Fail to parse BroadcastInfo from Json.");
  739. return false;
  740. }
  741. for (const auto &iter : var_broadcast_info) {
  742. VarBroadCastInfo broadcast_info;
  743. if (VarManager::Instance(session_id_)->GetBroadCastInfo(graph_id_, iter.first, broadcast_info) != SUCCESS) {
  744. GELOGW("Fail to find broadcast info of var[%s].", iter.first.c_str());
  745. return false;
  746. }
  747. if (iter.second.var_name != broadcast_info.var_name || iter.second.idx != broadcast_info.idx ||
  748. iter.second.input_size != broadcast_info.input_size ||
  749. iter.second.input_offset != broadcast_info.input_offset ||
  750. iter.second.output_size != broadcast_info.output_size ||
  751. iter.second.output_offset != broadcast_info.output_offset) {
  752. GELOGW("The BroadcastInfo of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  753. return false;
  754. }
  755. }
  756. return true;
  757. }
  758. bool ModelCacheHelper::IsTransRoadsSameAsCache(Json &json) const {
  759. if (!(json.is_null() || json.is_array())) {
  760. GELOGW("Input param json type should be null or array.");
  761. return false;
  762. }
  763. // Compare trans road between json and VarManager
  764. std::unordered_map<std::string, std::vector<TransNodeInfo>> trans_roads;
  765. auto ret = ParseTransRoadsFromJson(json, trans_roads);
  766. if (ret != SUCCESS) {
  767. GELOGW("Fail to parse TransRoads from Json.");
  768. return false;
  769. }
  770. for (const auto &iter : trans_roads) {
  771. VarTransRoad *trans_road;
  772. trans_road = VarManager::Instance(session_id_)->GetTransRoad(iter.first);
  773. if (trans_road == nullptr) {
  774. GELOGW("Fail to find trans road of var[%s].", iter.first.c_str());
  775. return false;
  776. }
  777. if (trans_road->size() != iter.second.size()) {
  778. GELOGW("The TransRoad of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  779. return false;
  780. }
  781. // Compare every trans node in trans road.
  782. for (size_t idx = 0; idx < trans_road->size(); idx += 1) {
  783. if (!(trans_road->at(idx).node_type == iter.second.at(idx).node_type &&
  784. trans_road->at(idx).input == iter.second.at(idx).input &&
  785. trans_road->at(idx).output == iter.second.at(idx).output)) {
  786. GELOGW("The TransRoad of variable[%s] in cache is different from VarManager.", iter.first.c_str());
  787. return false;
  788. }
  789. }
  790. }
  791. return true;
  792. }
  793. bool ModelCacheHelper::IsVarManagerParamSameAsCache(Json &json) const {
  794. if (!json.is_object()) {
  795. GELOGW("Input param json type should be object.");
  796. return false;
  797. }
  798. try {
  799. if (json[kSessionId].get<uint64_t>() != session_id_) {
  800. GELOGW("Check VarManager cache failed.[sessionId]");
  801. return false;
  802. }
  803. if (json[kDeviceId].get<uint32_t>() != VarManager::Instance(session_id_)->DeviceId()) {
  804. GELOGW("Check VarManager cache failed.[deviceId]");
  805. return false;
  806. }
  807. if (json[kJobId].get<uint64_t>() != VarManager::Instance(session_id_)->JobId()) {
  808. GELOGW("Check VarManager cache failed.[jobId]");
  809. return false;
  810. }
  811. if (json[kGraphMemMaxSize].get<size_t>() != VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()) {
  812. GELOGW("Check VarManager cache failed.[graphMemMaxSize]");
  813. return false;
  814. }
  815. if (json[kVarMemMaxSize].get<size_t>() != VarManager::Instance(session_id_)->GetVarMemMaxSize()) {
  816. GELOGW("Check VarManager cache failed.[varMemMaxSize]");
  817. return false;
  818. }
  819. if (json[kVarMemLogicBase].get<size_t>() != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
  820. GELOGW("Check VarManager cache failed.[varMemLogicBase]");
  821. return false;
  822. }
  823. if (json[kUseMaxMemSize].get<size_t>() != VarManager::Instance(session_id_)->GetUseMaxMemorySize()) {
  824. GELOGW("Check VarManager cache failed.[useMaxMemSize]");
  825. return false;
  826. }
  827. } catch (const std::exception &e) {
  828. GELOGW("Fail to check VarManager json. Error message: %s", e.what());
  829. return false;
  830. }
  831. return true;
  832. }
  833. bool ModelCacheHelper::IsVarManagerSameAsCache(Json &json) const {
  834. if (!json.is_object()) {
  835. GELOGW("Input param json type should be object.");
  836. return false;
  837. }
  838. try {
  839. if (!IsVarManagerParamSameAsCache(json)) {
  840. GELOGW("Check VarManager cache failed.[Param]");
  841. return false;
  842. }
  843. Json mem_resource_json = move(json[kMemResourceMap]);
  844. auto ret = IsMemResourceSameAsCache(mem_resource_json);
  845. if (!ret) {
  846. GELOGW("Check VarManager cache failed.[MemResource]");
  847. return false;
  848. }
  849. Json var_resource_json = move(json[kVarResource]);
  850. ret = IsAllocatedGraphIdSameAsCache(var_resource_json[kAllocatedGraphId]);
  851. if (!ret) {
  852. GELOGW("Check VarManager cache failed.[AllocatedGraphId]");
  853. return false;
  854. }
  855. ret = IsChangedGraphIdSameAsCache(var_resource_json[kChangedGraphId]);
  856. if (!ret) {
  857. GELOGW("Check VarManager cache failed.[ChangedGraphId]");
  858. return false;
  859. }
  860. ret = IsBroadcastInfoSameAsCache(var_resource_json[kVarBroadcastInfo]);
  861. if (!ret) {
  862. GELOGW("Check VarManager cache failed.[VarBroadcastInfo]");
  863. return false;
  864. }
  865. ret = IsCurVarTensorDescSameAsCache(var_resource_json[kCurVarTensorDescMap]);
  866. if (!ret) {
  867. GELOGW("Check VarManager cache failed.[CurVarTensorDesc]");
  868. return false;
  869. }
  870. ret = IsVarAddrMgrMapSameAsCache(var_resource_json[kVarAddrMgrMap]);
  871. if (!ret) {
  872. GELOGW("Check VarManager cache failed.[VarAddrMgrMap]");
  873. return false;
  874. }
  875. ret = IsTransRoadsSameAsCache(var_resource_json[kTransRoads]);
  876. if (!ret) {
  877. GELOGW("Check VarManager cache failed.[TransRoads]");
  878. return false;
  879. }
  880. } catch (const std::exception &e) {
  881. GELOGW("Fail to check VarManager json. Error message: %s", e.what());
  882. return false;
  883. }
  884. return true;
  885. }
  886. Status ModelCacheHelper::RecoverMemResource(const Json &json) const {
  887. if (!(json.is_null() || json.is_array())) {
  888. GELOGW("Input param json type should be null or array.");
  889. return PARAM_INVALID;
  890. }
  891. std::map<rtMemType_t, int64_t> var_mem_size;
  892. auto ret = ParseMemResourceFromJson(json, var_mem_size);
  893. if (ret != SUCCESS) {
  894. GELOGW("Fail to parse MemResource from Json.");
  895. return ret;
  896. }
  897. for (const auto &iter : var_mem_size) {
  898. ret = VarManager::Instance(session_id_)->UpdateVarMemSize(iter.first, iter.second);
  899. if (ret != SUCCESS) {
  900. GELOGW("Fail to recover var mem size.");
  901. return ret;
  902. }
  903. }
  904. return SUCCESS;
  905. }
  906. Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const {
  907. if (!(json.is_null() || json.is_array())) {
  908. GELOGW("Input param json type should be null or array.");
  909. return PARAM_INVALID;
  910. }
  911. std::unordered_map<std::string, uint32_t> allocated_graph_id;
  912. auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id);
  913. if (ret != SUCCESS) {
  914. GELOGW("Fail to parse AllocatedGraphId from Json.");
  915. return ret;
  916. }
  917. for (const auto &iter : allocated_graph_id) {
  918. ret = VarManager::Instance(session_id_)->SetAllocatedGraphId(iter.first, iter.second);
  919. if (ret != SUCCESS) {
  920. GELOGW("Fail to recover allocated graph id.");
  921. return ret;
  922. }
  923. }
  924. return SUCCESS;
  925. }
  926. Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const {
  927. if (!(json.is_null() || json.is_array())) {
  928. GELOGW("Input param json type should be null or array.");
  929. return PARAM_INVALID;
  930. }
  931. std::unordered_map<std::string, uint32_t> changed_graph_id;
  932. auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id);
  933. if (ret != SUCCESS) {
  934. GELOGW("Fail to parse AllocatedGraphId from Json.");
  935. return ret;
  936. }
  937. for (const auto &iter : changed_graph_id) {
  938. ret = VarManager::Instance(session_id_)->SetChangedGraphId(iter.first, iter.second);
  939. if (ret != SUCCESS) {
  940. GELOGW("Fail to recover changed graph id.");
  941. return ret;
  942. }
  943. }
  944. return SUCCESS;
  945. }
  946. Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const {
  947. if (!(json.is_null() || json.is_array())) {
  948. GELOGW("Input param json type should be null or array.");
  949. return PARAM_INVALID;
  950. }
  951. std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector;
  952. std::unordered_set<uint64_t> var_offset_set;
  953. auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set);
  954. if (ret != SUCCESS) {
  955. GELOGW("Fail to parse VarAddrMgrMap from Json.");
  956. return ret;
  957. }
  958. for (const auto &iter : var_addr_mgr_vector) {
  959. const VarAddrMgr &tensor_addr_mgr = iter.second;
  960. const bool var_exist = VarManager::Instance(session_id_)->IsVarExist(iter.first, tensor_addr_mgr.tensor_desc);
  961. // SaveVarVddr if var does not exist, the logic address will be recorded by VarManager
  962. if (!var_exist) {
  963. auto logic_address = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(tensor_addr_mgr.address));
  964. auto offset = (tensor_addr_mgr.offset);
  965. // Check logic address and offset
  966. if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) {
  967. GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address,
  968. offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase());
  969. return PARAM_INVALID;
  970. }
  971. // Offset is needed by SaveVarVddr instead of logic address
  972. ret =
  973. VarManager::Instance(session_id_)
  974. ->SaveVarAddr(iter.first, tensor_addr_mgr.tensor_desc,
  975. reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(offset)), tensor_addr_mgr.memory_type);
  976. if (ret != SUCCESS) {
  977. GELOGW("Fail to recover VarAddr or TensorDesc of var[%s].", iter.first.c_str());
  978. return ret;
  979. }
  980. }
  981. // SetVarAddr to update cur_var_tensor_desc_map_
  982. ret = VarManager::Instance(session_id_)
  983. ->SetVarAddr(iter.first, tensor_addr_mgr.tensor_desc, tensor_addr_mgr.address, tensor_addr_mgr.memory_type);
  984. if (ret != SUCCESS) {
  985. GELOGW("Fail to recover VarAddr or TensorDesc desc of var[%s].", iter.first.c_str());
  986. return ret;
  987. }
  988. }
  989. return SUCCESS;
  990. }
  991. Status ModelCacheHelper::RecoverBroadcastInfo(const Json &json) const {
  992. if (!(json.is_null() || json.is_array())) {
  993. GELOGW("Input param json type should be null or array.");
  994. return PARAM_INVALID;
  995. }
  996. std::unordered_map<std::string, VarBroadCastInfo> var_broadcast_info;
  997. auto ret = ParseBroadcastInfoFromJson(json, var_broadcast_info);
  998. if (ret != SUCCESS) {
  999. GELOGW("Fail to parse BroadcastInfo from Json.");
  1000. return ret;
  1001. }
  1002. for (const auto &iter : var_broadcast_info) {
  1003. VarBroadCastInfo broadcast_info;
  1004. ret = VarManager::Instance(session_id_)->SaveBroadCastInfo(graph_id_, iter.second);
  1005. if (ret != SUCCESS) {
  1006. GELOGW("Fail to recover broadcast info of var[%s].", iter.first.c_str());
  1007. return ret;
  1008. }
  1009. }
  1010. return SUCCESS;
  1011. }
  1012. Status ModelCacheHelper::RecoverTransRoads(const Json &json) const {
  1013. if (!(json.is_null() || json.is_array())) {
  1014. GELOGW("Input param json type should be null or array.");
  1015. return PARAM_INVALID;
  1016. }
  1017. std::unordered_map<std::string, std::vector<TransNodeInfo>> trans_roads;
  1018. auto ret = ParseTransRoadsFromJson(json, trans_roads);
  1019. if (ret != SUCCESS) {
  1020. GELOGW("Fail to parse TransRoads from Json.");
  1021. return ret;
  1022. }
  1023. for (const auto &iter : trans_roads) {
  1024. ret = VarManager::Instance(session_id_)->SetTransRoad(iter.first, iter.second);
  1025. if (ret != SUCCESS) {
  1026. GELOGW("Fail to find trans road of var[%s].", iter.first.c_str());
  1027. return ret;
  1028. }
  1029. }
  1030. return SUCCESS;
  1031. }
  1032. Status ModelCacheHelper::TensorDescToJson(const GeTensorDesc &ge_tensor_desc, Json &json) {
  1033. if (!(json.is_null() || json.is_object())) {
  1034. GELOGW("Input param json type should be null or object.");
  1035. return PARAM_INVALID;
  1036. }
  1037. try {
  1038. json[kDataType] = static_cast<int>(ge_tensor_desc.GetDataType());
  1039. json[kOriginDataType] = static_cast<int>(ge_tensor_desc.GetOriginDataType());
  1040. json[kLayout] = static_cast<int>(ge_tensor_desc.GetFormat());
  1041. json[kOriginLayout] = static_cast<int>(ge_tensor_desc.GetOriginFormat());
  1042. json[kShape] = ge_tensor_desc.GetShape().GetDims();
  1043. json[kOriginShape] = ge_tensor_desc.GetOriginShape().GetDims();
  1044. uint32_t real_dim_cnt = 0;
  1045. (void)TensorUtils::GetRealDimCnt(ge_tensor_desc, real_dim_cnt); // [No need to check value]
  1046. json[kRealDimCnt] = real_dim_cnt;
  1047. } catch (const std::exception &e) {
  1048. GELOGW("Fail to trans GeTensorDesc to json. Error message: %s", e.what());
  1049. return INTERNAL_ERROR;
  1050. }
  1051. return SUCCESS;
  1052. }
  1053. Status ModelCacheHelper::JsonToTensorDesc(const Json &json, ge::GeTensorDesc &ge_tensor_desc) {
  1054. if (!json.is_object()) {
  1055. GELOGW("Input param json type should be object.");
  1056. return PARAM_INVALID;
  1057. }
  1058. try {
  1059. ge_tensor_desc.SetDataType(static_cast<DataType>(json[kDataType].get<int>()));
  1060. ge_tensor_desc.SetOriginDataType(static_cast<DataType>(json[kOriginDataType].get<int>()));
  1061. ge_tensor_desc.SetFormat(static_cast<Format>(json[kLayout].get<int>()));
  1062. ge_tensor_desc.SetOriginFormat(static_cast<Format>(json[kOriginLayout].get<int>()));
  1063. GeShape shape(json[kShape].get<std::vector<int64_t>>());
  1064. ge_tensor_desc.SetShape(shape);
  1065. GeShape origin_shape(json[kOriginShape].get<std::vector<int64_t>>());
  1066. ge_tensor_desc.SetOriginShape(origin_shape);
  1067. auto real_dim_cnt = json[kRealDimCnt].get<uint32_t>();
  1068. (void)TensorUtils::SetRealDimCnt(ge_tensor_desc, real_dim_cnt); // [No need to check value]
  1069. } catch (const std::exception &e) {
  1070. GELOGW("Fail to trans Json to GeTensorDesc. Error message: %s", e.what());
  1071. return INTERNAL_ERROR;
  1072. }
  1073. return SUCCESS;
  1074. }
  1075. Status ModelCacheHelper::GetNodesHashMapJson(Json &json) const {
  1076. if (!(json.is_null() || json.is_array())) {
  1077. GELOGW("Input param json type should be null or array.");
  1078. return PARAM_INVALID;
  1079. }
  1080. map<std::string, size_t> hash_map;
  1081. GetNodesHash(hash_map);
  1082. for (const auto &iter : hash_map) {
  1083. Json node_hash_json;
  1084. try {
  1085. node_hash_json[kName] = iter.first;
  1086. node_hash_json[kHash] = iter.second;
  1087. json.emplace_back(move(node_hash_json));
  1088. } catch (const std::exception &e) {
  1089. GELOGW("Fail to trans node cache to json. Error message: %s", e.what());
  1090. return INTERNAL_ERROR;
  1091. }
  1092. }
  1093. return SUCCESS;
  1094. }
  1095. Status ModelCacheHelper::GetMemResourceMap(Json &json) const {
  1096. if (!(json.is_null() || json.is_array())) {
  1097. GELOGW("Input param json type should be null or array.");
  1098. return PARAM_INVALID;
  1099. }
  1100. const auto total_size = VarManager::Instance(session_id_)->GetVarMemMaxSize();
  1101. const auto var_mem_size = VarManager::Instance(session_id_)->GetVarMemSize(RT_MEMORY_HBM);
  1102. Json mem_resource_json;
  1103. try {
  1104. mem_resource_json[kMemType] = RT_MEMORY_HBM;
  1105. mem_resource_json[kTotalSize] = total_size;
  1106. mem_resource_json[kVarMemSize] = var_mem_size;
  1107. json.emplace_back(move(mem_resource_json));
  1108. } catch (const std::exception &e) {
  1109. GELOGW("Fail to trans MemResourceMap to json. Error message: %s", e.what());
  1110. return INTERNAL_ERROR;
  1111. }
  1112. return SUCCESS;
  1113. }
  1114. Status ModelCacheHelper::GetVarAddrMgrMapJson(Json &json) const {
  1115. if (!(json.is_null() || json.is_array())) {
  1116. GELOGW("Input param json type should be null or array.");
  1117. return PARAM_INVALID;
  1118. }
  1119. std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map;
  1120. VarManager::Instance(session_id_)->GetAllVarAddrMgr(var_addr_mgr_map);
  1121. try {
  1122. for (const auto &iter : var_addr_mgr_map) {
  1123. Json var_addr_json;
  1124. string name;
  1125. GetVarNameFromVarKey(iter.first, iter.second.tensor_desc, name);
  1126. var_addr_json[kName] = name;
  1127. var_addr_json[kAddress] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(iter.second.address));
  1128. var_addr_json[kMemoryType] = iter.second.memory_type;
  1129. var_addr_json[kOffset] = iter.second.offset;
  1130. // Copy tensor desc to json.
  1131. Json tensor_desc_json;
  1132. auto ret = TensorDescToJson(iter.second.tensor_desc, tensor_desc_json);
  1133. if (ret != SUCCESS) {
  1134. GELOGW("Fail to trans tensor desc to json.");
  1135. return INTERNAL_ERROR;
  1136. }
  1137. var_addr_json[kTensorDesc] = move(tensor_desc_json);
  1138. json.emplace_back(move(var_addr_json));
  1139. }
  1140. } catch (const std::exception &e) {
  1141. GELOGW("Fail to trans VarAddrMgrMap to json. Error message: %s", e.what());
  1142. return INTERNAL_ERROR;
  1143. }
  1144. return SUCCESS;
  1145. }
  1146. Status ModelCacheHelper::GetCurVarTensorDescMapJson(Json &json) const {
  1147. if (!(json.is_null() || json.is_array())) {
  1148. GELOGW("Input param json type should be null or array.");
  1149. return PARAM_INVALID;
  1150. }
  1151. try {
  1152. for (const auto &name : var_names_) {
  1153. Json cur_tensor_desc_json;
  1154. GeTensorDesc tensor_desc;
  1155. auto ret = VarManager::Instance(session_id_)->GetCurVarDesc(name, tensor_desc);
  1156. if (ret != SUCCESS) {
  1157. GELOGI("Get variable[%s] current tensor desc failed. It will be skipped.", name.c_str());
  1158. continue;
  1159. }
  1160. cur_tensor_desc_json[kName] = name;
  1161. Json tensor_desc_json;
  1162. ret = TensorDescToJson(tensor_desc, tensor_desc_json);
  1163. if (ret != SUCCESS) {
  1164. GELOGW("Fail to trans tensor desc to json.");
  1165. return INTERNAL_ERROR;
  1166. }
  1167. cur_tensor_desc_json[kTensorDesc] = move(tensor_desc_json);
  1168. json.emplace_back(move(cur_tensor_desc_json));
  1169. }
  1170. } catch (const std::exception &e) {
  1171. GELOGW("Fail to trans CurVarTensorDescMap to json. Error message: %s", e.what());
  1172. return INTERNAL_ERROR;
  1173. }
  1174. return SUCCESS;
  1175. }
  1176. Status ModelCacheHelper::GetTransRoadsJson(Json &json) const {
  1177. if (!(json.is_null() || json.is_array())) {
  1178. GELOGW("Input param json type should be null or array.");
  1179. return PARAM_INVALID;
  1180. }
  1181. try {
  1182. for (const auto &name : var_names_) {
  1183. auto trans_road = VarManager::Instance(session_id_)->GetTransRoad(name);
  1184. if (trans_road == nullptr) {
  1185. continue;
  1186. }
  1187. // Json object, variable name and trans road
  1188. Json trans_road_map_json;
  1189. trans_road_map_json[kName] = name;
  1190. Json trans_road_json;
  1191. Status ret;
  1192. // Add nodes' info to json
  1193. for (const auto &trans_node_info : *trans_road) {
  1194. Json trans_node_info_json;
  1195. trans_node_info_json[kNodeType] = trans_node_info.node_type;
  1196. Json input_tensor_desc_json;
  1197. ret = TensorDescToJson(trans_node_info.input, input_tensor_desc_json);
  1198. if (ret != SUCCESS) {
  1199. GELOGW("Fail to trans tensor desc to json.");
  1200. return INTERNAL_ERROR;
  1201. }
  1202. trans_node_info_json[kInputTensorDesc] = move(input_tensor_desc_json);
  1203. Json output_tensor_desc_json;
  1204. ret = TensorDescToJson(trans_node_info.output, output_tensor_desc_json);
  1205. if (ret != SUCCESS) {
  1206. GELOGW("Fail to trans tensor desc to json.");
  1207. return INTERNAL_ERROR;
  1208. }
  1209. trans_node_info_json[kOutputTensorDesc] = move(output_tensor_desc_json);
  1210. trans_road_json.emplace_back(move(trans_node_info_json));
  1211. }
  1212. trans_road_map_json[kTransRoad] = move(trans_road_json);
  1213. json.emplace_back(move(trans_road_map_json));
  1214. }
  1215. } catch (const std::exception &e) {
  1216. GELOGW("Fail to trans VarToTransRoad to json. Error message: %s", e.what());
  1217. return INTERNAL_ERROR;
  1218. }
  1219. return SUCCESS;
  1220. }
  1221. Status ModelCacheHelper::GetChangedGraphIdJson(Json &json) const {
  1222. if (!(json.is_null() || json.is_array())) {
  1223. GELOGW("Input param json type should be null or array.");
  1224. return PARAM_INVALID;
  1225. }
  1226. for (const auto &name : var_names_) {
  1227. uint32_t changed_graph_id = 0;
  1228. Status ret = VarManager::Instance(session_id_)->GetChangedGraphId(name, changed_graph_id);
  1229. if (ret != SUCCESS) {
  1230. continue;
  1231. }
  1232. Json name_and_changed_graph_id;
  1233. try {
  1234. name_and_changed_graph_id[kName] = name;
  1235. name_and_changed_graph_id[kGraphId] = changed_graph_id;
  1236. json.emplace_back(move(name_and_changed_graph_id));
  1237. } catch (const std::exception &e) {
  1238. GELOGW("Fail to trans ChangedGraphId to json. Error message: %s", e.what());
  1239. return INTERNAL_ERROR;
  1240. }
  1241. }
  1242. return SUCCESS;
  1243. }
  1244. Status ModelCacheHelper::GetAllocatedGraphIdJson(Json &json) const {
  1245. if (!(json.is_null() || json.is_array())) {
  1246. GELOGW("Input param json type should be null or array.");
  1247. return PARAM_INVALID;
  1248. }
  1249. for (const auto &name : var_names_) {
  1250. uint32_t allocated_graph_id = 0;
  1251. Status ret = VarManager::Instance(session_id_)->GetAllocatedGraphId(name, allocated_graph_id);
  1252. if (ret != SUCCESS) {
  1253. continue;
  1254. }
  1255. Json name_and_allocated_graph_id;
  1256. try {
  1257. name_and_allocated_graph_id[kName] = name;
  1258. name_and_allocated_graph_id[kGraphId] = allocated_graph_id;
  1259. json.emplace_back(move(name_and_allocated_graph_id));
  1260. } catch (const std::exception &e) {
  1261. GELOGW("Fail to trans AllocatedGraphId to json. Error message: %s", e.what());
  1262. return INTERNAL_ERROR;
  1263. }
  1264. }
  1265. return SUCCESS;
  1266. }
  1267. Status ModelCacheHelper::GetBroadcastInfoJson(Json &json) const {
  1268. if (!(json.is_null() || json.is_array())) {
  1269. GELOGW("Input param json type should be null or array.");
  1270. return PARAM_INVALID;
  1271. }
  1272. for (const auto &name : var_names_) {
  1273. VarBroadCastInfo var_broadcast_info;
  1274. Status ret = VarManager::Instance(session_id_)->GetBroadCastInfo(graph_id_, name, var_broadcast_info);
  1275. if (ret != SUCCESS) {
  1276. continue;
  1277. }
  1278. Json var_broadcast_info_json;
  1279. try {
  1280. var_broadcast_info_json[kName] = name;
  1281. var_broadcast_info_json[kBroadcastName] = var_broadcast_info.broadcast_name;
  1282. var_broadcast_info_json[kIdx] = var_broadcast_info.idx;
  1283. var_broadcast_info_json[kInputOffset] = var_broadcast_info.input_offset;
  1284. var_broadcast_info_json[kInputSize] = var_broadcast_info.input_size;
  1285. var_broadcast_info_json[kOutputOffset] = var_broadcast_info.output_offset;
  1286. var_broadcast_info_json[kOutputSize] = var_broadcast_info.output_size;
  1287. json.emplace_back(move(var_broadcast_info_json));
  1288. } catch (const std::exception &e) {
  1289. GELOGW("Fail to trans VarBroadcastInfo to json. Error message: %s", e.what());
  1290. return INTERNAL_ERROR;
  1291. }
  1292. }
  1293. return SUCCESS;
  1294. }
  1295. Status ModelCacheHelper::GetVarResourceJson(Json &json) const {
  1296. if (!(json.is_null() || json.is_object())) {
  1297. GELOGW("Input param json type should be null or object.");
  1298. return PARAM_INVALID;
  1299. }
  1300. Json var_addr_mgr_map_json;
  1301. Status ret = GetVarAddrMgrMapJson(var_addr_mgr_map_json);
  1302. if (ret != SUCCESS) {
  1303. GELOGW("GetVarAddrMgrMapJson failed.");
  1304. return INTERNAL_ERROR;
  1305. }
  1306. Json cur_var_tensor_desc_map_json;
  1307. ret = GetCurVarTensorDescMapJson(cur_var_tensor_desc_map_json);
  1308. if (ret != SUCCESS) {
  1309. GELOGW("GetCurVarTensorDescMapJson failed.");
  1310. return INTERNAL_ERROR;
  1311. }
  1312. Json trans_roads_json;
  1313. ret = GetTransRoadsJson(trans_roads_json);
  1314. if (ret != SUCCESS) {
  1315. GELOGW("GetTransRoadsJson failed.");
  1316. return INTERNAL_ERROR;
  1317. }
  1318. Json changed_graph_id_json;
  1319. ret = GetChangedGraphIdJson(changed_graph_id_json);
  1320. if (ret != SUCCESS) {
  1321. GELOGW("GetChangedGraphIdJson failed.");
  1322. return INTERNAL_ERROR;
  1323. }
  1324. Json allocated_graph_id_json;
  1325. ret = GetAllocatedGraphIdJson(allocated_graph_id_json);
  1326. if (ret != SUCCESS) {
  1327. GELOGW("GetAllocatedGraphIdJson failed.");
  1328. return INTERNAL_ERROR;
  1329. }
  1330. Json var_broadcast_info_json;
  1331. ret = GetBroadcastInfoJson(var_broadcast_info_json);
  1332. if (ret != SUCCESS) {
  1333. GELOGW("GetBroadcastInfoJson failed.");
  1334. return INTERNAL_ERROR;
  1335. }
  1336. try {
  1337. json[kVarAddrMgrMap] = move(var_addr_mgr_map_json);
  1338. json[kCurVarTensorDescMap] = move(cur_var_tensor_desc_map_json);
  1339. json[kTransRoads] = move(trans_roads_json);
  1340. json[kChangedGraphId] = move(changed_graph_id_json);
  1341. json[kAllocatedGraphId] = move(allocated_graph_id_json);
  1342. json[kVarBroadcastInfo] = move(var_broadcast_info_json);
  1343. } catch (const exception &e) {
  1344. GELOGW("Fail to generate VarResource json. Error message: %s", e.what());
  1345. return INTERNAL_ERROR;
  1346. }
  1347. return SUCCESS;
  1348. }
  1349. Status ModelCacheHelper::GetVarManagerJson(Json &json) const {
  1350. if (!(json.is_null() || json.is_object())) {
  1351. GELOGW("Input param json type should be null or object.");
  1352. return PARAM_INVALID;
  1353. }
  1354. Json mem_resource_map_json;
  1355. auto ret = GetMemResourceMap(mem_resource_map_json);
  1356. if (ret != SUCCESS) {
  1357. GELOGW("GetMemResourceMap failed.");
  1358. return INTERNAL_ERROR;
  1359. }
  1360. Json var_resource_json;
  1361. ret = GetVarResourceJson(var_resource_json);
  1362. if (ret != SUCCESS) {
  1363. GELOGW("GetVarResourceJson failed.");
  1364. return INTERNAL_ERROR;
  1365. }
  1366. try {
  1367. json[kSessionId] = session_id_;
  1368. json[kDeviceId] = VarManager::Instance(session_id_)->DeviceId();
  1369. json[kJobId] = VarManager::Instance(session_id_)->JobId();
  1370. json[kGraphMemMaxSize] = VarManager::Instance(session_id_)->GetGraphMemoryMaxSize();
  1371. json[kVarMemMaxSize] = VarManager::Instance(session_id_)->GetVarMemMaxSize();
  1372. json[kVarMemLogicBase] = VarManager::Instance(session_id_)->GetVarMemLogicBase();
  1373. json[kUseMaxMemSize] = VarManager::Instance(session_id_)->GetUseMaxMemorySize();
  1374. json[kMemResourceMap] = move(mem_resource_map_json);
  1375. json[kVarResource] = move(var_resource_json);
  1376. } catch (const exception &e) {
  1377. GELOGW("Fail to generate VarManager json. Error message: %s", e.what());
  1378. return INTERNAL_ERROR;
  1379. }
  1380. return SUCCESS;
  1381. }
  1382. Status ModelCacheHelper::SaveVarManagerToCache(bool before_build) const {
  1383. if (!is_cache_path_valid_for_output) {
  1384. GELOGW("Invalid cache path.");
  1385. return FAILED;
  1386. }
  1387. Json var_manager_json;
  1388. auto ret = GetVarManagerJson(var_manager_json);
  1389. if (ret != SUCCESS) {
  1390. GELOGW("Fail to generate VarManager json.");
  1391. return FAILED;
  1392. }
  1393. string var_manager_path = to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) +
  1394. (before_build ? kBeforeVarManagerSuffix : kAfterVarManagerSuffix);
  1395. ret = SaveJsonToFile(var_manager_path, var_manager_json);
  1396. if (ret != SUCCESS) {
  1397. GELOGW("Fail to save VarManager info to json file, path: %s.", cache_path_.c_str());
  1398. return ret;
  1399. }
  1400. return SUCCESS;
  1401. }
  1402. Status ModelCacheHelper::SaveOmModelToCache(const GeModelPtr &ge_model) const {
  1403. if (!is_cache_path_valid_for_output) {
  1404. GELOGW("Invalid cache path.");
  1405. return FAILED;
  1406. }
  1407. string om_path = RealPath(cache_path_.c_str());
  1408. if (om_path.empty()) {
  1409. GELOGW("file path is invalid. please check path om: %s", cache_path_.c_str());
  1410. return FAILED;
  1411. }
  1412. string cache_om_path = cache_path_;
  1413. cache_om_path += (to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kOmSuffix);
  1414. GELOGI("SaveOmModelToCache: start to save om model : %s", cache_om_path.c_str());
  1415. ModelHelper model_helper;
  1416. SaveParam save_param;
  1417. ModelBufferData model;
  1418. Status ret = model_helper.SaveToOmModel(ge_model, save_param, cache_om_path, model);
  1419. if (ret != SUCCESS) {
  1420. GELOGW("SaveOmModelToCache: save mode failed. ret = %u", ret);
  1421. return ret;
  1422. }
  1423. return SUCCESS;
  1424. }
  1425. Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource) {
  1426. if (!(json.is_array() || json.is_null())) {
  1427. GELOGW("Input param json type should be null or array.");
  1428. return PARAM_INVALID;
  1429. }
  1430. mem_resource.clear();
  1431. for (const Json &mem_resource_json : json) {
  1432. MemResource var_addr_mgr;
  1433. try {
  1434. rtMemType_t mem_type = mem_resource_json[kMemType].get<rtMemType_t>();
  1435. uint64_t var_mem_size = mem_resource_json[kVarMemSize].get<int64_t>();
  1436. mem_resource[mem_type] = var_mem_size;
  1437. } catch (const exception &e) {
  1438. GELOGW("Fail to trans Json to MemResource. Error message: %s", e.what());
  1439. return INTERNAL_ERROR;
  1440. }
  1441. }
  1442. return SUCCESS;
  1443. }
  1444. Status ModelCacheHelper::ParseVarAddrMgrMapFromJson(
  1445. const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector,
  1446. std::unordered_set<uint64_t> &var_offset_set) {
  1447. if (!(json.is_array() || json.is_null())) {
  1448. GELOGW("Input param json type should be null or array.");
  1449. return PARAM_INVALID;
  1450. }
  1451. var_addr_mgr_vector.clear();
  1452. var_offset_set.clear();
  1453. for (const Json &var_addr_json : json) {
  1454. VarAddrMgr var_addr_mgr;
  1455. try {
  1456. auto logic_address = var_addr_json[kAddress].get<uint64_t>();
  1457. auto address = reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(logic_address));
  1458. var_addr_mgr.address = address;
  1459. var_addr_mgr.offset = var_addr_json[kOffset].get<uint64_t>();
  1460. var_addr_mgr.memory_type = var_addr_json[kMemoryType].get<rtMemType_t>();
  1461. auto ret = JsonToTensorDesc(var_addr_json[kTensorDesc], var_addr_mgr.tensor_desc);
  1462. if (ret != SUCCESS) {
  1463. GELOGW("Fail to trans json to tensor desc.");
  1464. return ret;
  1465. }
  1466. var_addr_mgr_vector.emplace_back(var_addr_json[kName].get<string>(), move(var_addr_mgr));
  1467. var_offset_set.insert(logic_address);
  1468. } catch (const exception &e) {
  1469. GELOGW("Fail to trans Json to VarAddrMgr. Error message: %s", e.what());
  1470. return INTERNAL_ERROR;
  1471. }
  1472. }
  1473. return SUCCESS;
  1474. }
  1475. Status ModelCacheHelper::ParseCurVarTensorDescMapFromJson(
  1476. const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map) {
  1477. if (!(json.is_array() || json.is_null())) {
  1478. GELOGW("Input param json type should be null or array.");
  1479. return PARAM_INVALID;
  1480. }
  1481. cur_var_tensor_desc_map.clear();
  1482. for (const Json &tensor_desc_json : json) {
  1483. GeTensorDesc tensor_desc;
  1484. try {
  1485. auto ret = JsonToTensorDesc(tensor_desc_json[kTensorDesc], tensor_desc);
  1486. if (ret != SUCCESS) {
  1487. GELOGW("Fail to trans json to tensor desc.");
  1488. return ret;
  1489. }
  1490. cur_var_tensor_desc_map[tensor_desc_json[kName].get<string>()] = move(tensor_desc);
  1491. } catch (const exception &e) {
  1492. GELOGW("Fail to trans Json to VarAddrMgr. Error message: %s", e.what());
  1493. return INTERNAL_ERROR;
  1494. }
  1495. }
  1496. return SUCCESS;
  1497. }
  1498. Status ModelCacheHelper::ParseTransRoadsFromJson(
  1499. const Json &json, std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads) {
  1500. if (!(json.is_array() || json.is_null())) {
  1501. GELOGW("Input param json type should be null or array.");
  1502. return PARAM_INVALID;
  1503. }
  1504. trans_roads.clear();
  1505. try {
  1506. for (const Json &name_trans_road_json : json) {
  1507. const Json &trans_road_json = name_trans_road_json[kTransRoad];
  1508. if (!(trans_road_json.is_array() || trans_road_json.is_null())) {
  1509. GELOGW("%s json type should be null or object.", kTransRoad);
  1510. return PARAM_INVALID;
  1511. }
  1512. vector<TransNodeInfo> trans_road;
  1513. for (const Json &trans_node_json : trans_road_json) {
  1514. TransNodeInfo trans_node_info;
  1515. trans_node_info.node_type = trans_node_json[kNodeType];
  1516. GeTensorDesc input_tensor_desc;
  1517. auto ret = JsonToTensorDesc(trans_node_json[kInputTensorDesc], input_tensor_desc);
  1518. if (ret != SUCCESS) {
  1519. GELOGW("Fail to trans json to tensor desc.");
  1520. return ret;
  1521. }
  1522. trans_node_info.input = move(input_tensor_desc);
  1523. GeTensorDesc output_tensor_desc;
  1524. ret = JsonToTensorDesc(trans_node_json[kOutputTensorDesc], output_tensor_desc);
  1525. if (ret != SUCCESS) {
  1526. GELOGW("Fail to trans json to tensor desc.");
  1527. return ret;
  1528. }
  1529. trans_node_info.output = move(output_tensor_desc);
  1530. trans_road.emplace_back(move(trans_node_info));
  1531. }
  1532. trans_roads[name_trans_road_json[kName].get<string>()] = move(trans_road);
  1533. }
  1534. } catch (const exception &e) {
  1535. GELOGW("Fail to trans Json to TransRoads. Error message: %s", e.what());
  1536. return INTERNAL_ERROR;
  1537. }
  1538. return SUCCESS;
  1539. }
  1540. Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json,
  1541. std::unordered_map<std::string, uint32_t> &changed_graph_id) {
  1542. if (!(json.is_array() || json.is_null())) {
  1543. GELOGW("Input param json type should be null or array.");
  1544. return PARAM_INVALID;
  1545. }
  1546. changed_graph_id.clear();
  1547. for (const Json &name_graph_id_json : json) {
  1548. try {
  1549. changed_graph_id[name_graph_id_json[kName].get<string>()] = name_graph_id_json[kGraphId].get<uint32_t>();
  1550. } catch (const exception &e) {
  1551. GELOGW("Fail to trans Json to changed graph id. Error message: %s", e.what());
  1552. return INTERNAL_ERROR;
  1553. }
  1554. }
  1555. return SUCCESS;
  1556. }
  1557. Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json,
  1558. std::unordered_map<std::string, uint32_t> &allocated_graph_id) {
  1559. if (!(json.is_array() || json.is_null())) {
  1560. GELOGW("Input param json type should be null or array.");
  1561. return PARAM_INVALID;
  1562. }
  1563. allocated_graph_id.clear();
  1564. for (const Json &name_graph_id_json : json) {
  1565. try {
  1566. allocated_graph_id[name_graph_id_json[kName].get<string>()] = name_graph_id_json[kGraphId].get<uint32_t>();
  1567. } catch (const exception &e) {
  1568. GELOGW("Fail to trans Json to allocated graph id. Error message: %s", e.what());
  1569. return INTERNAL_ERROR;
  1570. }
  1571. }
  1572. return SUCCESS;
  1573. }
  1574. Status ModelCacheHelper::ParseBroadcastInfoFromJson(
  1575. const Json &json, std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info) {
  1576. if (!(json.is_array() || json.is_null())) {
  1577. GELOGW("Input param json type should be null or array.");
  1578. return PARAM_INVALID;
  1579. }
  1580. for (const Json &broadcast_info_json : json) {
  1581. VarBroadCastInfo broadcast_info;
  1582. try {
  1583. broadcast_info.var_name = broadcast_info_json[kName].get<string>();
  1584. broadcast_info.broadcast_name = broadcast_info_json[kBroadcastName].get<string>();
  1585. broadcast_info.idx = broadcast_info_json[kIdx].get<int>();
  1586. broadcast_info.input_offset = broadcast_info_json[kInputOffset].get<int64_t>();
  1587. broadcast_info.input_size = broadcast_info_json[kInputSize].get<uint64_t>();
  1588. broadcast_info.output_offset = broadcast_info_json[kOutputOffset].get<int64_t>();
  1589. broadcast_info.output_size = broadcast_info_json[kOutputSize].get<uint64_t>();
  1590. } catch (const exception &e) {
  1591. GELOGW("Fail to trans Json to VarBroadCastInfo. Error message: %s", e.what());
  1592. return INTERNAL_ERROR;
  1593. }
  1594. var_broadcast_info[broadcast_info.var_name] = broadcast_info;
  1595. }
  1596. return SUCCESS;
  1597. }
  1598. Status ModelCacheHelper::LoadOmModelFromCache(GeModelPtr &ge_model) const {
  1599. string cache_om = cache_path_ + to_string(graph_id_) + "_" + to_string(graph_id_run_times_[graph_id_]) + kOmSuffix;
  1600. if (!CheckInputPathValid(cache_om)) {
  1601. GELOGW("Invalid cache path for input:%s.", cache_om.c_str());
  1602. return FAILED;
  1603. }
  1604. string om_path = RealPath(cache_om.c_str());
  1605. if (om_path.empty()) {
  1606. GELOGW("file path is invalid. please check file om: %s", om_path.c_str());
  1607. return FAILED;
  1608. }
  1609. GELOGI("load model data from file: %s", om_path.c_str());
  1610. Status ret;
  1611. string key_path;
  1612. int32_t priority = 0;
  1613. ModelData model_data;
  1614. ret = DavinciModelParser::LoadFromFile(om_path.c_str(), key_path.c_str(), priority, model_data);
  1615. if (ret != SUCCESS) {
  1616. GELOGW("LoadOmModelFromCache: Load model from file failed. ret = %u", ret);
  1617. return ret;
  1618. }
  1619. ModelHelper model_helper;
  1620. ret = model_helper.LoadModel(model_data);
  1621. if (ret != SUCCESS) {
  1622. GELOGW("LoadOmModelFromCache: Load model from data failed. ret = %u", ret);
  1623. return ret;
  1624. }
  1625. ge_model = model_helper.GetGeModel();
  1626. ret = RecompileNodes(ge_model);
  1627. if (ret != SUCCESS) {
  1628. GELOGW("LoadOmModelFromCache: recompile nodes failed. ret = %u", ret);
  1629. return ret;
  1630. }
  1631. return SUCCESS;
  1632. }
  1633. Status ModelCacheHelper::GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc,
  1634. string &var_name) {
  1635. std::string::size_type underline_idx = var_key.rfind('_');
  1636. if (underline_idx == std::string::npos) {
  1637. GELOGW("Invalid var key: underline not found");
  1638. return FAILED;
  1639. }
  1640. std::string::size_type format_idx =
  1641. var_key.rfind(std::to_string(static_cast<int32_t>(tensor_desc.GetFormat())), underline_idx);
  1642. if (format_idx == std::string::npos) {
  1643. GELOGW("Invalid var key: format not found");
  1644. return FAILED;
  1645. }
  1646. var_name = var_key.substr(0, format_idx);
  1647. return SUCCESS;
  1648. }
  1649. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示