You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

host_cpu_engine.cc 16 kB

4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
4 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "ge_local_engine/engine/host_cpu_engine.h"
  17. #include "graph/utils/op_desc_utils.h"
  18. #include "graph/utils/tensor_adapter.h"
  19. #include "graph/utils/node_utils.h"
  20. #include "graph/utils/type_utils.h"
  21. #include "register/op_kernel_registry.h"
  22. #include "register/host_cpu_context.h"
  23. #include "common/ge/ge_util.h"
  24. #include "common/ge/plugin_manager.h"
  25. #include "common/fp16_t.h"
  26. #include "common/math/math_util.h"
  27. namespace {
  28. #define CREATE_OUTPUT_CASE(DTYPE) \
  29. case (DTYPE): { \
  30. GeTensorPtr ge_tensor = nullptr; \
  31. if (need_create_flag) { \
  32. int64_t size = ge::GetSizeInBytes(static_cast<int64_t>(data_num), DTYPE); \
  33. if (size < 0) { \
  34. return INTERNAL_ERROR; \
  35. } \
  36. ge_tensor = MakeShared<GeTensor>(out_desc, static_cast<size_t>(size)); \
  37. GE_CHECK_NOTNULL(ge_tensor); \
  38. GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size); \
  39. ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \
  40. ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \
  41. } else { \
  42. ge_tensor = outputs[i]; \
  43. GE_CHECK_NOTNULL(ge_tensor); \
  44. GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
  45. } \
  46. auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
  47. auto tensor_name = op_desc->GetOutputNameByIndex(i); \
  48. GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "[Get][OutputName] failed. node = %s, index = %zu", \
  49. op_desc->GetName().c_str(), i); \
  50. named_outputs.emplace(tensor_name, tensor); \
  51. break; \
  52. }
  53. }
  54. namespace ge {
  55. namespace {
  56. const char *kEnvKeyOppPath = "ASCEND_OPP_PATH";
  57. const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu";
  58. const std::string kConstantFoldingName = "libconstant_folding_ops.so";
  59. }
  60. Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
  61. int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize();
  62. if (out_desc.GetShape().IsUnknownShape()) {
  63. std::vector<std::pair<int64_t, int64_t>> range;
  64. if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) {
  65. REPORT_CALL_ERROR("E19999", "GetShapeRange failed.");
  66. GELOGE(INTERNAL_ERROR, "[Get][ShapeRange] failed.");
  67. return INTERNAL_ERROR;
  68. }
  69. int64_t max_range_size = 1;
  70. for (const auto& item : range) {
  71. FMK_INT64_MULCHECK(max_range_size, item.second);
  72. max_range_size *= item.second;
  73. }
  74. num_size = max_range_size;
  75. }
  76. if (num_size < 0) {
  77. REPORT_INNER_ERROR("E19999", "Get negative size, num_size=%ld.", num_size);
  78. GELOGE(INTERNAL_ERROR, "[Check][Param] Get negative size, num_size=%ld.", num_size);
  79. return INTERNAL_ERROR;
  80. }
  81. data_num = static_cast<uint64_t>(num_size);
  82. return SUCCESS;
  83. }
  84. void HostCpuEngine::CloseSo() {
  85. for (auto handle : lib_handles_) {
  86. if (mmDlclose(handle) != 0) {
  87. const char *error = mmDlerror();
  88. error = (error == nullptr) ? "" : error;
  89. GELOGW("failed to close handle, message: %s", error);
  90. }
  91. }
  92. lib_handles_.clear();
  93. }
  94. ge::Status HostCpuEngine::Initialize() {
  95. std::lock_guard<std::mutex> lock(mu_);
  96. if (initialized_) {
  97. GELOGI("HostCpuEngine is already initialized");
  98. return SUCCESS;
  99. }
  100. std::string lib_dir;
  101. GE_CHK_STATUS_RET_NOLOG(GetLibPath(lib_dir));
  102. std::vector<std::string> so_paths;
  103. if (ListSoFiles(lib_dir, so_paths) == SUCCESS) {
  104. (void) LoadLibs(so_paths);
  105. }
  106. initialized_ = true;
  107. return SUCCESS;
  108. }
  109. void HostCpuEngine::Finalize() {
  110. GELOGI("start HostCpuEngine::Finalize");
  111. }
  112. bool HostCpuEngine::CheckSupported(const string &op_type) {
  113. return OpKernelRegistry::GetInstance().IsRegistered(op_type);
  114. }
  115. Status HostCpuEngine::FindOpKernel(const ge::NodePtr &node, std::unique_ptr<HostCpuOp> &op_kernel) {
  116. const std::string op_type = NodeUtils::GetNodeType(node);
  117. auto kernel = OpKernelRegistry::GetInstance().CreateHostCpuOp(op_type);
  118. if (kernel == nullptr) {
  119. GELOGD("Op of type %s is not supported by host cpu engine", op_type.c_str());
  120. return UNSUPPORTED;
  121. }
  122. GELOGD("Successfully created op kernel. op type = %s", op_type.c_str());
  123. op_kernel = std::move(kernel);
  124. return SUCCESS;
  125. }
  126. Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc,
  127. const vector<ConstGeTensorPtr> &inputs,
  128. map<std::string, const Tensor> &named_inputs) {
  129. auto num_inputs = op_desc->GetInputsSize();
  130. if (num_inputs != inputs.size()) {
  131. REPORT_INNER_ERROR("E19999", "Mismatching input sizes. op_desc:%s(%s) has %zu input(s), but given %zu",
  132. op_desc->GetName().c_str(), op_desc->GetType().c_str(), num_inputs, inputs.size());
  133. GELOGE(PARAM_INVALID, "[Check][Param] Mismatching input sizes. op_desc:%s(%s) has %zu input(s), but given %zu",
  134. op_desc->GetName().c_str(), op_desc->GetType().c_str(), num_inputs, inputs.size());
  135. return PARAM_INVALID;
  136. }
  137. for (size_t i = 0; i < num_inputs; ++i) {
  138. auto ge_tensor = inputs[i];
  139. GE_CHECK_NOTNULL(ge_tensor);
  140. auto tensor = TensorAdapter::AsTensor(*ge_tensor);
  141. auto tensor_name = op_desc->GetInputNameByIndex(i);
  142. GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "[Get][InputName] failed. node = %s, index = %zu",
  143. op_desc->GetName().c_str(), i);
  144. GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s",
  145. op_desc->GetName().c_str(), i, tensor_name.c_str());
  146. named_inputs.emplace(tensor_name, tensor);
  147. }
  148. return SUCCESS;
  149. }
  150. Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc,
  151. vector<GeTensorPtr> &outputs,
  152. map<std::string, Tensor> &named_outputs) {
  153. if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
  154. GELOGW("size of outputs not match, size of outputs = %zu, exactly output_num=%zu.",
  155. outputs.size(), op_desc->GetOutputsSize());
  156. outputs.clear();
  157. }
  158. bool need_create_flag = (outputs.size() != op_desc->GetOutputsSize());
  159. for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) {
  160. const auto &out_desc = op_desc->GetOutputDesc(i);
  161. uint64_t data_num = 0;
  162. if (need_create_flag) {
  163. if (GetDataNumber(out_desc, data_num) != SUCCESS) {
  164. GELOGE(INTERNAL_ERROR, "[Get][Number] node:%s get size for output %zu failed", op_desc->GetName().c_str(), i);
  165. return INTERNAL_ERROR;
  166. }
  167. }
  168. switch (out_desc.GetDataType()) {
  169. CREATE_OUTPUT_CASE(DT_BOOL)
  170. CREATE_OUTPUT_CASE(DT_INT8)
  171. CREATE_OUTPUT_CASE(DT_INT16)
  172. CREATE_OUTPUT_CASE(DT_INT32)
  173. CREATE_OUTPUT_CASE(DT_INT64)
  174. CREATE_OUTPUT_CASE(DT_UINT8)
  175. CREATE_OUTPUT_CASE(DT_UINT16)
  176. CREATE_OUTPUT_CASE(DT_UINT32)
  177. CREATE_OUTPUT_CASE(DT_UINT64)
  178. CREATE_OUTPUT_CASE(DT_FLOAT16)
  179. CREATE_OUTPUT_CASE(DT_FLOAT)
  180. CREATE_OUTPUT_CASE(DT_DOUBLE)
  181. CREATE_OUTPUT_CASE(DT_INT4)
  182. default:
  183. GELOGW("data type %s not support.",
  184. TypeUtils::DataTypeToSerialString(out_desc.GetDataType()).c_str());
  185. return NOT_CHANGED;
  186. }
  187. }
  188. return SUCCESS;
  189. }
  190. Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc,
  191. HostCpuOp &op_kernel,
  192. map<std::string, const Tensor> &named_inputs,
  193. map<std::string, Tensor> &named_outputs) {
  194. GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str());
  195. Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc);
  196. auto ret = op_kernel.Compute(op, named_inputs, named_outputs);
  197. if (ret != GRAPH_SUCCESS) {
  198. GELOGW("Failed to compute host cpu op. node = %s", op_desc->GetName().c_str());
  199. return FAILED;
  200. }
  201. op.BreakConnect();
  202. return SUCCESS;
  203. }
  204. Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs) {
  205. GE_CHECK_NOTNULL(node);
  206. GE_CHECK_NOTNULL(node->GetOpDesc());
  207. GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str());
  208. std::unique_ptr<HostCpuOp> op_kernel;
  209. GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel));
  210. std::map<std::string, const Tensor> named_inputs;
  211. std::map<std::string, Tensor> named_outputs;
  212. auto op_desc = node->GetOpDesc();
  213. GE_CHK_STATUS_RET_NOLOG(PrepareInputs(op_desc, inputs, named_inputs));
  214. GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, outputs, named_outputs));
  215. GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs));
  216. std::vector<GeTensorPtr> tmp_outputs;
  217. for (size_t i = 0; i < op_desc->GetOutputsSize(); i++) {
  218. auto tensor_name = op_desc->GetOutputNameByIndex(i);
  219. if (tensor_name.empty()) {
  220. REPORT_INNER_ERROR("E19999", "GetOutputNameByIndex failed, node = %s, index = %zu",
  221. op_desc->GetName().c_str(), i);
  222. GELOGE(INTERNAL_ERROR, "[Get][OutputName] failed. node = %s, index = %zu", op_desc->GetName().c_str(), i);
  223. return INTERNAL_ERROR;
  224. }
  225. auto iter = named_outputs.find(tensor_name);
  226. if (iter == named_outputs.end()) {
  227. REPORT_INNER_ERROR("E19999", "get output tensor failed, node = %s, index = %zu, tensor_name = %s",
  228. op_desc->GetName().c_str(), i, tensor_name.c_str());
  229. GELOGE(INTERNAL_ERROR, "[Get][OutputTensor] failed. node = %s, index = %zu, tensor_name = %s",
  230. op_desc->GetName().c_str(), i, tensor_name.c_str());
  231. return INTERNAL_ERROR;
  232. }
  233. auto ge_tensor = MakeShared<GeTensor>(TensorAdapter::AsGeTensor(iter->second));
  234. GE_CHECK_NOTNULL(ge_tensor);
  235. tmp_outputs.emplace_back(ge_tensor);
  236. }
  237. GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str());
  238. outputs.swap(tmp_outputs);
  239. return SUCCESS;
  240. }
  241. ge::Status HostCpuEngine::GetLibPath(std::string &lib_path) {
  242. GELOGI("Start to get host cpu lib path");
  243. const char *path_env = std::getenv(kEnvKeyOppPath);
  244. if (path_env != nullptr) {
  245. lib_path = path_env;
  246. if (!lib_path.empty()) {
  247. lib_path += kHostCpuLibRelativePath;
  248. GELOGI("Get host cpu so path from env: %s", lib_path.c_str());
  249. return SUCCESS;
  250. }
  251. }
  252. lib_path = PluginManager::GetPath();
  253. GELOGI("path_base is %s", lib_path.c_str());
  254. lib_path = lib_path.substr(0, lib_path.rfind('/'));
  255. lib_path = lib_path.substr(0, lib_path.rfind('/'));
  256. lib_path += "/opp";
  257. lib_path += kHostCpuLibRelativePath;
  258. GELOGI("Get host cpu so path from PluginManager::GetPath: %s", lib_path.c_str());
  259. return SUCCESS;
  260. }
  261. static int RegularFileFilterFn(const mmDirent *entry) {
  262. return entry->d_type == DT_REG;
  263. }
  264. Status HostCpuEngine::ListSoFiles(const std::string &base_dir, std::vector<std::string> &names) {
  265. std::string real_path = base_dir;
  266. GE_CHK_STATUS_RET_NOLOG(GetRealPath(real_path));
  267. real_path.push_back('/');
  268. mmDirent **entries = nullptr;
  269. auto ret = mmScandir(real_path.c_str(), &entries, RegularFileFilterFn, nullptr);
  270. if (ret < 0) {
  271. GELOGW("scan dir failed. path = %s, ret = %d, errmsg = %s", real_path.c_str(), ret, strerror(errno));
  272. return INTERNAL_ERROR;
  273. }
  274. for (int i = 0; i < ret; ++i) {
  275. mmDirent *dir_ent = entries[i];
  276. string name = string(dir_ent->d_name);
  277. if (IsSoFile(name)) {
  278. names.emplace_back(real_path + name);
  279. }
  280. }
  281. mmScandirFree(entries, ret);
  282. GELOGI("Found %d libs to load", ret);
  283. return SUCCESS;
  284. }
  285. bool HostCpuEngine::IsSoFile(const std::string &file_name) {
  286. static const std::string so_suffix(".so");
  287. auto pos = file_name.rfind(so_suffix);
  288. if (pos == string::npos) {
  289. return false;
  290. }
  291. return pos == file_name.size() - so_suffix.size();
  292. }
  293. Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {
  294. for (auto &so_path : lib_paths) {
  295. GE_CHK_STATUS_RET_NOLOG(GetRealPath(so_path));
  296. GE_CHK_STATUS_RET_NOLOG(LoadLib(so_path));
  297. }
  298. return SUCCESS;
  299. }
  300. Status HostCpuEngine::LoadLib(const std::string &lib_path) {
  301. GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
  302. auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
  303. if (handle == nullptr) {
  304. const char *error = mmDlerror();
  305. error = (error == nullptr) ? "" : error;
  306. REPORT_CALL_ERROR("E19999", "mmDlopen failed, path = %s, error = %s", lib_path.c_str(), error);
  307. GELOGE(INTERNAL_ERROR, "[Invoke][DlOpen] failed. path = %s, error = %s", lib_path.c_str(), error);
  308. return INTERNAL_ERROR;
  309. }
  310. auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize");
  311. if (initialize != nullptr) {
  312. GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
  313. if (initialize(HostCpuContext()) != SUCCESS) {
  314. GELOGW("Failed to invoke function Initialize in lib: %s", lib_path.c_str());
  315. }
  316. }
  317. GELOGI("Lib: %s has been opened", lib_path.c_str());
  318. if (lib_path.find(kConstantFoldingName) != lib_path.npos) {
  319. constant_folding_handle_ = handle;
  320. }
  321. lib_handles_.emplace_back(handle);
  322. return SUCCESS;
  323. }
  324. Status HostCpuEngine::GetRealPath(std::string &path) {
  325. std::string real_path = RealPath(path.c_str());
  326. if (real_path.empty()) {
  327. GELOGW("File path %s is invalid.", path.c_str());
  328. return INTERNAL_ERROR;
  329. }
  330. path = real_path;
  331. return SUCCESS;
  332. }
  333. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示