You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_properties.cc 18 kB

4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/dump/dump_properties.h"
  17. #include <cstdio>
  18. #include <string>
  19. #include <regex>
  20. #include "common/ge/ge_util.h"
  21. #include "framework/common/util.h"
  22. #include "framework/common/debug/ge_log.h"
  23. #include "framework/common/debug/log.h"
  24. #include "framework/common/ge_types.h"
  25. #include "framework/common/types.h"
  26. #include "graph/debug/ge_attr_define.h"
  27. #include "graph/ge_context.h"
  28. #include "graph/utils/attr_utils.h"
  29. namespace {
  30. const std::string kEnableFlag = "1";
  31. const std::string kDumpStatusOpen = "on";
  32. const uint32_t kAicoreOverflow = (0x1 << 0);
  33. const uint32_t kAtomicOverflow = (0x1 << 1);
  34. const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
  35. } // namespace
  36. namespace ge {
  37. void DumpProperties::Split(const std::string &s, std::vector<std::string> &result, const char *delchar) {
  38. if (s.empty()) {
  39. return;
  40. }
  41. result.clear();
  42. char *buffer = new (std::nothrow)char[s.size() + 1];
  43. if (buffer == nullptr) {
  44. GELOGE(FAILED, "[Split][string] failed while malloc memory, string value is:%s", s.c_str());
  45. REPORT_CALL_ERROR("E19999", "Memory malloc may fail when split string, get fatal exception, "
  46. "string value is:%s", s.c_str());
  47. return;
  48. }
  49. buffer[s.size()] = '\0';
  50. errno_t e = strcpy_s(buffer, s.size() + 1, s.c_str());
  51. if (e != EOK) {
  52. delete[] buffer;
  53. return;
  54. }
  55. char *context = nullptr;
  56. char *p = strtok_s(buffer, delchar, &context);
  57. while (p != nullptr) {
  58. result.emplace_back(p);
  59. p = strtok_s(nullptr, delchar, &context);
  60. }
  61. delete[] buffer;
  62. }
  63. Status DumpProperties::CheckDumpStep(const std::string &dump_step) {
  64. std::string modified_dum_step = dump_step + "|";
  65. std::smatch result;
  66. std::vector<string> match_vecs;
  67. std::regex pattern(R"((\d{1,}-\d{1,}\||\d{1,}\|)+)");
  68. if (regex_match(modified_dum_step, result, pattern)) {
  69. Split(result.str(), match_vecs, "|");
  70. if (match_vecs.empty()) {
  71. REPORT_CALL_ERROR("E19999", "Split may get fatal exception, dump_step:%s.", dump_step.c_str());
  72. GELOGE(FAILED, "[Check][Param] failed. Split may get fatal exception, ge.exec.dumpStep:%s.", dump_step.c_str());
  73. return FAILED;
  74. }
  75. // 100 is the max sets of dump steps.
  76. if (match_vecs.size() > 100) {
  77. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  78. std::vector<std::string>({
  79. "ge.exec.dumpStep",
  80. dump_step.c_str(),
  81. " is not supported, only support dump <= 100 sets of data"}));
  82. GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, "
  83. "dump_step only support dump <= 100 sets of data.", dump_step.c_str());
  84. return PARAM_INVALID;
  85. }
  86. for (const auto &match_vec : match_vecs) {
  87. std::vector<string> vec_after_split;
  88. Split(match_vec, vec_after_split, "-");
  89. if (match_vecs.empty()) {
  90. REPORT_CALL_ERROR("E19999", "Split may get fatal exception.");
  91. GELOGE(FAILED, "[Check][Param] failed, split may get fatal exception.");
  92. return FAILED;
  93. }
  94. if (vec_after_split.size() > 1) {
  95. if (std::atoi(vec_after_split[0].c_str()) >= std::atoi(vec_after_split[1].c_str())) {
  96. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  97. std::vector<std::string>({
  98. "ge.exec.dumpStep",
  99. dump_step.c_str(),
  100. " is not supported."
  101. "in range steps, the first step is >= second step, correct example:'0|5|10-20"}));
  102. GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, "
  103. "in range steps, the first step is >= second step, correct example:'0|5|10-20'", dump_step.c_str());
  104. return PARAM_INVALID;
  105. }
  106. }
  107. }
  108. } else {
  109. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  110. std::vector<std::string>({
  111. "ge.exec.dumpStep",
  112. dump_step.c_str(),
  113. " is not supported, correct example:'0|5|10|50-100."}));
  114. GELOGE(PARAM_INVALID, "[Check][Param] get dump_step value:%s, "
  115. "dump_step string style is error, correct example:'0|5|10|50-100.'", dump_step.c_str());
  116. return PARAM_INVALID;
  117. }
  118. return SUCCESS;
  119. }
  120. Status DumpProperties::CheckDumpMode(const std::string &dump_mode) {
  121. const std::set<string> dump_mode_list = {"input", "output", "all"};
  122. std::set<string>::iterator iter;
  123. if ((iter = dump_mode_list.find(dump_mode)) == dump_mode_list.end()) {
  124. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  125. std::vector<std::string>({
  126. "ge.exec.dumpMode",
  127. dump_mode.c_str(),
  128. " is not supported, should be one of the following:[input, output, all]"}));
  129. GELOGE(PARAM_INVALID, "[Check][Param] the dump_debug_mode:%s, is is not supported,"
  130. "should be one of the following:[input, output, all].", dump_mode.c_str());
  131. return PARAM_INVALID;
  132. }
  133. return SUCCESS;
  134. }
  135. Status DumpProperties::CheckDumpPath(const std::string &input) {
  136. if (mmIsDir(input.c_str()) != EN_OK) {
  137. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  138. std::vector<std::string>({
  139. "ge.exec.dumpPath",
  140. input.c_str(),
  141. " is not a directory."}));
  142. GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, is not directory.", input.c_str());
  143. return PARAM_INVALID;
  144. }
  145. char trusted_path[MMPA_MAX_PATH] = { "\0" };
  146. if (mmRealPath(input.c_str(), trusted_path, MMPA_MAX_PATH) != EN_OK) {
  147. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  148. std::vector<std::string>({
  149. "ge.exec.dumpPath",
  150. input.c_str(),
  151. " dumpPath invalid."}));
  152. GELOGE(PARAM_INVALID, "[Check][Param] the dumpPath:%s, is invalid.", input.c_str());
  153. return PARAM_INVALID;
  154. }
  155. if (mmAccess2(trusted_path, M_R_OK | M_W_OK) != EN_OK) {
  156. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  157. std::vector<std::string>({
  158. "ge.exec.dumpPath",
  159. input.c_str(),
  160. " does't have read, write permissions."}));
  161. GELOGE(PARAM_INVALID, "[Check][Param] the path:%s, does't have read, write permissions.", input.c_str());
  162. return PARAM_INVALID;
  163. }
  164. return SUCCESS;
  165. }
  166. Status DumpProperties::CheckEnableDump(const std::string &input) {
  167. std::set<string> enable_dump_option_list = {"1", "0"};
  168. auto it = enable_dump_option_list.find(input);
  169. if (it == enable_dump_option_list.end()) {
  170. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  171. std::vector<std::string>({
  172. "ge.exec.enableDump",
  173. input.c_str(),
  174. " only support 1 or 0."}));
  175. GELOGE(PARAM_INVALID, "[Check][Param] Not support ge.exec.enableDump or ge.exec.enableDumpDebug format:%s, "
  176. "only support 1 or 0.", input.c_str());
  177. return PARAM_INVALID;
  178. }
  179. return SUCCESS;
  180. }
  181. DumpProperties::DumpProperties(const DumpProperties &other) {
  182. CopyFrom(other);
  183. }
  184. DumpProperties &DumpProperties::operator=(const DumpProperties &other) {
  185. CopyFrom(other);
  186. return *this;
  187. }
  188. Status DumpProperties::SetDumpOptions() {
  189. if (enable_dump_ == kEnableFlag) {
  190. std::string dump_step;
  191. if (GetContext().GetOption(OPTION_EXEC_DUMP_STEP, dump_step) == GRAPH_SUCCESS && !dump_step.empty()) {
  192. GE_CHK_STATUS_RET(CheckDumpStep(dump_step), "[Check][dump_step] failed.");
  193. GELOGI("Get dump step %s successfully", dump_step.c_str());
  194. SetDumpStep(dump_step);
  195. }
  196. string dump_mode = "output";
  197. if (GetContext().GetOption(OPTION_EXEC_DUMP_MODE, dump_mode) == GRAPH_SUCCESS) {
  198. GELOGI("Get dump mode %s successfully", dump_mode.c_str());
  199. GE_CHK_STATUS_RET(CheckDumpMode(dump_mode), "[Check][dump_mode] failed.");
  200. SetDumpMode(dump_mode);
  201. }
  202. AddPropertyValue(DUMP_ALL_MODEL, {});
  203. }
  204. return SUCCESS;
  205. }
  206. Status DumpProperties::InitByOptions() {
  207. enable_dump_.clear();
  208. enable_dump_debug_.clear();
  209. dump_path_.clear();
  210. dump_step_.clear();
  211. dump_mode_.clear();
  212. is_train_op_debug_ = false;
  213. is_infer_op_debug_ = false;
  214. op_debug_mode_ = 0;
  215. std::string enable_dump = std::to_string(false);
  216. (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP, enable_dump);
  217. enable_dump_ = enable_dump;
  218. if (!enable_dump_.empty()) {
  219. GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_), "[Check][enable_dump] failed.");
  220. }
  221. std::string enable_dump_debug = std::to_string(false);
  222. (void)GetContext().GetOption(OPTION_EXEC_ENABLE_DUMP_DEBUG, enable_dump_debug);
  223. enable_dump_debug_ = enable_dump_debug;
  224. if (!enable_dump_debug_.empty()) {
  225. GE_CHK_STATUS_RET(CheckEnableDump(enable_dump_debug_), "[Check][enable_dump_debug] failed.");
  226. }
  227. if ((enable_dump_ == kEnableFlag) && (enable_dump_debug_ == kEnableFlag)) {
  228. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  229. std::vector<std::string>({
  230. "ge.exec.enableDump and ge.exec.enableDumpDebug",
  231. enable_dump_ + ", " + enable_dump_debug,
  232. "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be set to 1 at the same time."}));
  233. GELOGE(FAILED, "ge.exec.enableDump and ge.exec.enableDumpDebug cannot be both set to 1 at the same time.");
  234. return FAILED;
  235. }
  236. if ((enable_dump_ == kEnableFlag) || (enable_dump_debug_ == kEnableFlag)) {
  237. std::string dump_path;
  238. if (GetContext().GetOption(OPTION_EXEC_DUMP_PATH, dump_path) == GRAPH_SUCCESS) {
  239. GE_CHK_STATUS_RET(CheckDumpPath(dump_path), "Check dump path failed.");
  240. if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') {
  241. dump_path = dump_path + "/";
  242. }
  243. dump_path = dump_path + CurrentTimeInStr() + "/";
  244. GELOGI("Get dump path %s successfully", dump_path.c_str());
  245. SetDumpPath(dump_path);
  246. } else {
  247. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  248. std::vector<std::string>({
  249. "ge.exec.dumpPath",
  250. dump_path,
  251. "ge.exec.dumpPath is not set."}));
  252. GELOGE(FAILED, "[Check][dump_path] failed. Dump path is not set.");
  253. return FAILED;
  254. }
  255. }
  256. GE_CHK_STATUS_RET(SetDumpOptions(), "SetDumpOptions failed.");
  257. GE_CHK_STATUS_RET(SetDumpDebugOptions(), "SetDumpDebugOptions failed.");
  258. return SUCCESS;
  259. }
  260. // The following is the new dump scenario of the fusion operator
  261. void DumpProperties::AddPropertyValue(const std::string &model, const std::set<std::string> &layers) {
  262. for (const std::string &layer : layers) {
  263. GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
  264. }
  265. model_dump_properties_map_[model] = layers;
  266. }
  267. void DumpProperties::DeletePropertyValue(const std::string &model) {
  268. auto iter = model_dump_properties_map_.find(model);
  269. if (iter != model_dump_properties_map_.end()) {
  270. model_dump_properties_map_.erase(iter);
  271. }
  272. }
  273. void DumpProperties::ClearDumpPropertyValue() {
  274. model_dump_properties_map_.clear();
  275. }
  276. void DumpProperties::ClearDumpInfo() {
  277. enable_dump_.clear();
  278. enable_dump_debug_.clear();
  279. dump_path_.clear();
  280. dump_step_.clear();
  281. dump_mode_.clear();
  282. dump_op_switch_.clear();
  283. dump_status_.clear();
  284. is_train_op_debug_ = false;
  285. is_infer_op_debug_ = false;
  286. op_debug_mode_ = 0;
  287. }
  288. std::set<std::string> DumpProperties::GetAllDumpModel() const {
  289. std::set<std::string> model_list;
  290. for (auto &iter : model_dump_properties_map_) {
  291. model_list.insert(iter.first);
  292. }
  293. return model_list;
  294. }
  295. std::set<std::string> DumpProperties::GetPropertyValue(const std::string &model) const {
  296. auto iter = model_dump_properties_map_.find(model);
  297. if (iter != model_dump_properties_map_.end()) {
  298. return iter->second;
  299. }
  300. return {};
  301. }
  302. bool DumpProperties::IsLayerNeedDump(const std::string &model, const std::string &om_name,
  303. const std::string &op_name) const {
  304. // if dump all
  305. GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str());
  306. if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
  307. return true;
  308. }
  309. // if this model need dump
  310. auto om_name_iter = model_dump_properties_map_.find(om_name);
  311. auto model_name_iter = model_dump_properties_map_.find(model);
  312. if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) {
  313. // if no dump layer info, dump all layer in this model
  314. auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter;
  315. if (model_iter->second.empty()) {
  316. return true;
  317. }
  318. return model_iter->second.find(op_name) != model_iter->second.end();
  319. }
  320. GELOGD("Model %s is not seated to be dump", model.c_str());
  321. return false;
  322. }
  323. void DumpProperties::SetDumpPath(const std::string &path) {
  324. dump_path_ = path;
  325. }
  326. const std::string &DumpProperties::GetDumpPath() const {
  327. return dump_path_;
  328. }
  329. void DumpProperties::SetDumpStep(const std::string &step) {
  330. dump_step_ = step;
  331. }
  332. const std::string &DumpProperties::GetDumpStep() const {
  333. return dump_step_;
  334. }
  335. void DumpProperties::SetDumpMode(const std::string &mode) {
  336. dump_mode_ = mode;
  337. }
  338. const std::string &DumpProperties::GetDumpMode() const {
  339. return dump_mode_;
  340. }
  341. void DumpProperties::SetDumpStatus(const std::string &status) {
  342. dump_status_ = status;
  343. }
  344. const std::string &DumpProperties::GetDumpStatus() const {
  345. return dump_status_;
  346. }
  347. void DumpProperties::InitInferOpDebug() {
  348. is_infer_op_debug_ = true;
  349. }
  350. void DumpProperties::SetOpDebugMode(const uint32_t &op_debug_mode) {
  351. op_debug_mode_ = op_debug_mode;
  352. }
  353. void DumpProperties::SetDumpOpSwitch(const std::string &dump_op_switch) {
  354. dump_op_switch_ = dump_op_switch;
  355. }
  356. const std::string &DumpProperties::GetDumpOpSwitch() const {
  357. return dump_op_switch_;
  358. }
  359. bool DumpProperties::IsSingleOpNeedDump() const {
  360. if (dump_op_switch_ == kDumpStatusOpen) {
  361. return true;
  362. }
  363. return false;
  364. }
  365. bool DumpProperties::IsDumpOpen() const {
  366. if (enable_dump_ == kEnableFlag || dump_status_ == kDumpStatusOpen) {
  367. return true;
  368. }
  369. return false;
  370. }
  371. void DumpProperties::CopyFrom(const DumpProperties &other) {
  372. if (&other != this) {
  373. enable_dump_ = other.enable_dump_;
  374. enable_dump_debug_ = other.enable_dump_debug_;
  375. dump_path_ = other.dump_path_;
  376. dump_step_ = other.dump_step_;
  377. dump_mode_ = other.dump_mode_;
  378. dump_status_ = other.dump_status_;
  379. dump_op_switch_ = other.dump_op_switch_;
  380. model_dump_properties_map_ = other.model_dump_properties_map_;
  381. is_train_op_debug_ = other.is_train_op_debug_;
  382. is_infer_op_debug_ = other.is_infer_op_debug_;
  383. op_debug_mode_ = other.op_debug_mode_;
  384. }
  385. }
  386. Status DumpProperties::SetDumpDebugOptions() {
  387. if (enable_dump_debug_ == kEnableFlag) {
  388. std::string dump_debug_mode;
  389. if (GetContext().GetOption(OPTION_EXEC_DUMP_DEBUG_MODE, dump_debug_mode) == GRAPH_SUCCESS) {
  390. GELOGD("Get ge.exec.dumpDebugMode %s successfully.", dump_debug_mode.c_str());
  391. } else {
  392. GELOGW("ge.exec.dumpDebugMode is not set.");
  393. return SUCCESS;
  394. }
  395. if (dump_debug_mode == OP_DEBUG_AICORE) {
  396. GELOGD("ge.exec.dumpDebugMode=aicore_overflow, op debug is open.");
  397. is_train_op_debug_ = true;
  398. op_debug_mode_ = kAicoreOverflow;
  399. } else if (dump_debug_mode == OP_DEBUG_ATOMIC) {
  400. GELOGD("ge.exec.dumpDebugMode=atomic_overflow, op debug is open.");
  401. is_train_op_debug_ = true;
  402. op_debug_mode_ = kAtomicOverflow;
  403. } else if (dump_debug_mode == OP_DEBUG_ALL) {
  404. GELOGD("ge.exec.dumpDebugMode=all, op debug is open.");
  405. is_train_op_debug_ = true;
  406. op_debug_mode_ = kAllOverflow;
  407. } else {
  408. REPORT_INPUT_ERROR("E10001", std::vector<std::string>({"parameter", "value", "reason"}),
  409. std::vector<std::string>({
  410. "ge.exec.dumpDebugMode",
  411. dump_debug_mode,
  412. "ge.exec.dumpDebugMode is invalid."}));
  413. GELOGE(PARAM_INVALID, "[Set][DumpDebugOptions] failed, ge.exec.dumpDebugMode is invalid.");
  414. return PARAM_INVALID;
  415. }
  416. } else {
  417. GELOGI("ge.exec.enableDumpDebug is false or is not set");
  418. }
  419. return SUCCESS;
  420. }
  421. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示