You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dump_manager.cc 6.1 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "common/dump/dump_manager.h"
  17. #include "framework/common/debug/ge_log.h"
  18. #include "framework/common/debug/log.h"
  19. namespace {
  20. const char *const kDumpOFF = "OFF";
  21. const char *const kDumpoff = "off";
  22. const char *const kDumpOn = "on";
  23. const uint64_t kInferSessionId = 0;
  24. const uint32_t kAllOverflow = 3;
  25. } // namespace
  26. namespace ge {
  27. DumpManager &DumpManager::GetInstance() {
  28. static DumpManager instance;
  29. return instance;
  30. }
  31. bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) {
  32. if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) {
  33. dump_properties_map_[kInferSessionId] = dump_properties;
  34. GELOGI("Dump does not open");
  35. return false;
  36. }
  37. GELOGI("Dump status is %s, dump debug is %s.", dump_config.dump_status.c_str(), dump_config.dump_debug.c_str());
  38. if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) &&
  39. dump_config.dump_debug == kDumpoff) {
  40. dump_properties.ClearDumpPropertyValue();
  41. dump_properties_map_[kInferSessionId] = dump_properties;
  42. return false;
  43. }
  44. if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) {
  45. GELOGW("Not support coexistence of dump debug and dump status.");
  46. return false;
  47. }
  48. return true;
  49. }
  50. void DumpManager::SetDumpDebugConf(const DumpConfig &dump_config, DumpProperties &dump_properties) {
  51. if (dump_config.dump_debug == kDumpOn) {
  52. GELOGI("Only do overflow detection, dump debug is %s.", dump_config.dump_debug.c_str());
  53. dump_properties.InitInferOpDebug();
  54. dump_properties.SetOpDebugMode(kAllOverflow);
  55. }
  56. }
  57. void DumpManager::SetDumpList(const DumpConfig &dump_config, DumpProperties &dump_properties) {
  58. for (const auto &model_dump : dump_config.dump_list) {
  59. std::string model_name = model_dump.model_name;
  60. GELOGI("Dump model is %s", model_name.c_str());
  61. std::set<std::string> dump_layers;
  62. for (const auto &layer : model_dump.layers) {
  63. GELOGI("Dump layer is %s in model", layer.c_str());
  64. dump_layers.insert(layer);
  65. }
  66. dump_properties.AddPropertyValue(model_name, dump_layers);
  67. }
  68. }
  69. Status DumpManager::SetNormalDumpConf(const DumpConfig &dump_config, DumpProperties &dump_properties) {
  70. if (dump_config.dump_status == kDumpOn) {
  71. GELOGI("Only do normal dump process, dump status is %s", dump_config.dump_status.c_str());
  72. dump_properties.SetDumpStatus(dump_config.dump_status);
  73. std::string dump_op_switch = dump_config.dump_op_switch;
  74. dump_properties.SetDumpOpSwitch(dump_op_switch);
  75. if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
  76. dump_properties_map_.emplace(kInferSessionId, dump_properties);
  77. GELOGE(PARAM_INVALID, "[Check][DumpList]Invalid, dump_op_switch is %s", dump_op_switch.c_str());
  78. REPORT_INNER_ERROR("E19999", "Dump list check invalid, dump_op_switch is %s", dump_op_switch.c_str());
  79. return PARAM_INVALID;
  80. }
  81. if (!dump_config.dump_list.empty()) {
  82. if (dump_op_switch == kDumpOn) {
  83. GELOGI("Start to dump model and single op, dump op switch is %s", dump_op_switch.c_str());
  84. } else {
  85. GELOGI("Only dump model, dump op switch is %s", dump_op_switch.c_str());
  86. }
  87. SetDumpList(dump_config, dump_properties);
  88. } else {
  89. GELOGI("Only dump single op, dump op switch is %s", dump_op_switch.c_str());
  90. }
  91. GELOGI("Dump mode is %s", dump_config.dump_mode.c_str());
  92. dump_properties.SetDumpMode(dump_config.dump_mode);
  93. }
  94. return SUCCESS;
  95. }
  96. Status DumpManager::SetDumpPath(const DumpConfig &dump_config, DumpProperties &dump_properties) {
  97. std::string dump_path = dump_config.dump_path;
  98. if (dump_path.empty()) {
  99. GELOGE(PARAM_INVALID, "[Check][DumpPath]It is empty.");
  100. REPORT_INNER_ERROR("E19999", "Dump path check is empty.");
  101. return PARAM_INVALID;
  102. }
  103. if (dump_path[dump_path.size() - 1] != '/') {
  104. dump_path = dump_path + "/";
  105. }
  106. dump_path = dump_path + CurrentTimeInStr() + "/";
  107. GELOGI("Dump path is %s", dump_path.c_str());
  108. dump_properties.SetDumpPath(dump_path);
  109. return SUCCESS;
  110. }
  111. Status DumpManager::SetDumpConf(const DumpConfig &dump_config) {
  112. DumpProperties dump_properties;
  113. if (!NeedDoDump(dump_config, dump_properties)) {
  114. GELOGD("No need do dump process.");
  115. return SUCCESS;
  116. }
  117. SetDumpDebugConf(dump_config, dump_properties);
  118. GE_CHK_STATUS_RET(SetNormalDumpConf(dump_config, dump_properties), "[Init][DumpConf] failed when dump status is on.");
  119. GE_CHK_STATUS_RET(SetDumpPath(dump_config, dump_properties), "[Init][DumpPath] failed.");
  120. dump_properties_map_[kInferSessionId] = dump_properties;
  121. return SUCCESS;
  122. }
  123. const DumpProperties &DumpManager::GetDumpProperties(uint64_t session_id) {
  124. std::lock_guard<std::mutex> lock(mutex_);
  125. auto iter = dump_properties_map_.find(session_id);
  126. if (iter != dump_properties_map_.end()) {
  127. return iter->second;
  128. }
  129. static DumpProperties default_properties;
  130. return default_properties;
  131. }
  132. void DumpManager::AddDumpProperties(uint64_t session_id, const DumpProperties &dump_properties) {
  133. std::lock_guard<std::mutex> lock(mutex_);
  134. dump_properties_map_.emplace(session_id, dump_properties);
  135. }
  136. void DumpManager::RemoveDumpProperties(uint64_t session_id) {
  137. std::lock_guard<std::mutex> lock(mutex_);
  138. auto iter = dump_properties_map_.find(session_id);
  139. if (iter != dump_properties_map_.end()) {
  140. dump_properties_map_.erase(iter);
  141. }
  142. }
  143. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示