You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

host_cpu_engine.cc 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "host_cpu_engine.h"
  17. #include <dlfcn.h>
  18. #include "graph/common/omg_util.h"
  19. #include "graph/utils/op_desc_utils.h"
  20. #include "graph/utils/tensor_adapter.h"
  21. #include "register/op_kernel_registry.h"
  22. #include "register/host_cpu_context.h"
  23. #include "common/ge/ge_util.h"
  24. #include "common/ge/plugin_manager.h"
  25. #include "graph/utils/type_utils.h"
  26. #include "common/fp16_t.h"
  27. #include "common/math/math_util.h"
  28. namespace {
  29. #define CREATE_OUTPUT_CASE(DTYPE, TYPE) \
  30. case (DTYPE): { \
  31. GeTensorPtr ge_tensor = nullptr; \
  32. if (need_create_flag) { \
  33. int64_t num_size = out_desc.GetShape().IsScalar() ? 1 : out_desc.GetShape().GetShapeSize(); \
  34. if (out_desc.GetShape().IsUnknownShape()) { \
  35. std::vector<std::pair<int64_t, int64_t>> range; \
  36. if (out_desc.GetShapeRange(range) != GRAPH_SUCCESS) { \
  37. GELOGE(INTERNAL_ERROR, "Get shape range failed, node:%s", op_desc->GetName().c_str()); \
  38. return INTERNAL_ERROR; \
  39. } \
  40. int64_t max_range_size = 1; \
  41. for (const auto &item : range) { \
  42. FMK_INT64_MULCHECK(max_range_size, item.second); \
  43. max_range_size *= item.second; \
  44. } \
  45. num_size = max_range_size; \
  46. } \
  47. if (num_size < 0) { \
  48. GELOGE(INTERNAL_ERROR, "node:%s, get size for output %zu failed, num=%lld", op_desc->GetName().c_str(), i, \
  49. num_size); \
  50. return INTERNAL_ERROR; \
  51. } \
  52. auto data_num = static_cast<uint64_t>(num_size); \
  53. GELOGI("node:%s allocate output %zu start, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE)); \
  54. std::unique_ptr<TYPE[]> buf(new (std::nothrow) TYPE[data_num]()); \
  55. if (buf == nullptr) { \
  56. GELOGE(MEMALLOC_FAILED, "New sizeof(T) * data_num(%zu) memory failed", \
  57. static_cast<size_t>(sizeof(TYPE) * data_num)); \
  58. return MEMALLOC_FAILED; \
  59. } \
  60. ge_tensor = MakeShared<GeTensor>(out_desc); \
  61. GE_CHECK_NOTNULL(ge_tensor); \
  62. GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, \
  63. data_num * sizeof(TYPE)); \
  64. if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \
  65. GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \
  66. return MEMALLOC_FAILED; \
  67. } \
  68. ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \
  69. ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \
  70. outputs.emplace_back(ge_tensor); \
  71. } else { \
  72. ge_tensor = outputs[i]; \
  73. GE_CHECK_NOTNULL(ge_tensor); \
  74. GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \
  75. reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \
  76. } \
  77. auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
  78. auto tensor_name = op_desc->GetOutputNameByIndex(i); \
  79. GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get output name. node = %s, index = %zu", \
  80. op_desc->GetName().c_str(), i); \
  81. GELOGD("Successfully inserted output tensor. node = %s, index = %zu, output name = %s, addr = %p, size = %zu", \
  82. op_desc->GetName().c_str(), i, tensor_name.c_str(), tensor.GetData(), tensor.GetSize()); \
  83. named_outputs.emplace(tensor_name, tensor); \
  84. break; \
  85. }
  86. } // namespace
  87. namespace ge {
  88. namespace {
  89. const char *kEnvKeyOppPath = "ASCEND_OPP_PATH";
  90. const char *kHostCpuLibRelativePath = "/op_impl/built-in/host_cpu";
  91. } // namespace
  92. void HostCpuEngine::CloseSo() {
  93. for (auto handle : lib_handles_) {
  94. if (dlclose(handle) != 0) {
  95. GELOGW("failed to close handle, message: %s", dlerror());
  96. }
  97. }
  98. lib_handles_.clear();
  99. }
  100. ge::Status HostCpuEngine::Initialize() {
  101. std::lock_guard<std::mutex> lock(mu_);
  102. if (initialized_) {
  103. GELOGI("HostCpuEngine is already initialized");
  104. return SUCCESS;
  105. }
  106. std::string lib_dir;
  107. GE_CHK_STATUS_RET_NOLOG(GetLibPath(lib_dir));
  108. std::vector<std::string> so_paths;
  109. if (ListSoFiles(lib_dir, so_paths) == SUCCESS) {
  110. (void)LoadLibs(so_paths);
  111. }
  112. initialized_ = true;
  113. return SUCCESS;
  114. }
  115. void HostCpuEngine::Finalize() { GELOGI("start HostCpuEngine::Finalize"); }
  116. bool HostCpuEngine::CheckSupported(const string &op_type) {
  117. return OpKernelRegistry::GetInstance().IsRegistered(op_type);
  118. }
  119. Status HostCpuEngine::FindOpKernel(const ge::NodePtr &node, std::unique_ptr<HostCpuOp> &op_kernel) {
  120. std::string op_type;
  121. auto status = GetOriginalType(node, op_type);
  122. GE_CHK_BOOL_EXEC_NOLOG(status == SUCCESS, return status);
  123. auto kernel = OpKernelRegistry::GetInstance().CreateHostCpuOp(op_type);
  124. if (kernel == nullptr) {
  125. GELOGD("Op of type %s is not supported by host cpu engine", op_type.c_str());
  126. return UNSUPPORTED;
  127. }
  128. GELOGD("Successfully created op kernel. op type = %s", op_type.c_str());
  129. op_kernel = std::move(kernel);
  130. return SUCCESS;
  131. }
  132. Status HostCpuEngine::PrepareInputs(const ge::ConstOpDescPtr &op_desc, const vector<ConstGeTensorPtr> &inputs,
  133. map<std::string, const Tensor> &named_inputs) {
  134. auto num_inputs = op_desc->GetInputsSize();
  135. if (num_inputs != inputs.size()) {
  136. GELOGE(PARAM_INVALID, "Mismatching input sizes. op_desc has %zu input(s), but given %zu", num_inputs,
  137. inputs.size());
  138. return PARAM_INVALID;
  139. }
  140. for (size_t i = 0; i < num_inputs; ++i) {
  141. auto ge_tensor = inputs[i];
  142. GE_CHECK_NOTNULL(ge_tensor);
  143. auto tensor = TensorAdapter::AsTensor(*ge_tensor);
  144. auto tensor_name = op_desc->GetInputNameByIndex(i);
  145. GE_RETURN_WITH_LOG_IF_TRUE(tensor_name.empty(), "Failed to get input name. node = %s, index = %zu",
  146. op_desc->GetName().c_str(), i);
  147. GELOGD("Successfully inserted input tensor. node = %s, index = %zu, input name = %s", op_desc->GetName().c_str(), i,
  148. tensor_name.c_str());
  149. named_inputs.emplace(tensor_name, tensor);
  150. }
  151. return SUCCESS;
  152. }
  153. Status HostCpuEngine::PrepareOutputs(const ge::ConstOpDescPtr &op_desc, vector<GeTensorPtr> &outputs,
  154. map<std::string, Tensor> &named_outputs) {
  155. if (!outputs.empty() && (outputs.size() != op_desc->GetOutputsSize())) {
  156. GELOGW("size of ouputs not match, size of outputs = %zu, exactly output_num=%zu.", outputs.size(),
  157. op_desc->GetOutputsSize());
  158. outputs.clear();
  159. }
  160. bool need_create_flag = (outputs.size() != op_desc->GetOutputsSize());
  161. for (size_t i = 0; i < op_desc->GetOutputsSize(); ++i) {
  162. const auto &out_desc = op_desc->GetOutputDesc(i);
  163. switch (out_desc.GetDataType()) {
  164. CREATE_OUTPUT_CASE(DT_BOOL, bool)
  165. CREATE_OUTPUT_CASE(DT_INT8, int8_t)
  166. CREATE_OUTPUT_CASE(DT_INT16, int16_t)
  167. CREATE_OUTPUT_CASE(DT_INT32, int32_t)
  168. CREATE_OUTPUT_CASE(DT_INT64, int64_t)
  169. CREATE_OUTPUT_CASE(DT_UINT8, uint8_t)
  170. CREATE_OUTPUT_CASE(DT_UINT16, uint16_t)
  171. CREATE_OUTPUT_CASE(DT_UINT32, uint32_t)
  172. CREATE_OUTPUT_CASE(DT_UINT64, uint64_t)
  173. CREATE_OUTPUT_CASE(DT_FLOAT16, fp16_t)
  174. CREATE_OUTPUT_CASE(DT_FLOAT, float)
  175. CREATE_OUTPUT_CASE(DT_DOUBLE, double)
  176. default:
  177. GELOGE(PARAM_INVALID, "data type %s not support.",
  178. TypeUtils::DataTypeToSerialString(out_desc.GetDataType()).c_str());
  179. return PARAM_INVALID;
  180. }
  181. }
  182. return SUCCESS;
  183. }
  184. Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_kernel,
  185. map<std::string, const Tensor> &named_inputs,
  186. map<std::string, Tensor> &named_outputs) {
  187. GELOGD("Run operation on host cpu, op name: %s", op_desc->GetName().c_str());
  188. Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc);
  189. auto ret = op_kernel.Compute(op, named_inputs, named_outputs);
  190. if (ret != GRAPH_SUCCESS) {
  191. GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret);
  192. return FAILED;
  193. }
  194. op.BreakConnect();
  195. return SUCCESS;
  196. }
  197. Status HostCpuEngine::Run(NodePtr &node, const vector<ConstGeTensorPtr> &inputs, std::vector<GeTensorPtr> &outputs) {
  198. GE_CHECK_NOTNULL(node);
  199. GE_CHECK_NOTNULL(node->GetOpDesc());
  200. GELOGD("Run node by host cpu engine. node name = %s", node->GetName().c_str());
  201. std::unique_ptr<HostCpuOp> op_kernel;
  202. GE_CHK_STATUS_RET_NOLOG(FindOpKernel(node, op_kernel));
  203. std::map<std::string, const Tensor> named_inputs;
  204. std::vector<GeTensorPtr> tmp_outputs;
  205. tmp_outputs.swap(outputs);
  206. std::map<std::string, Tensor> named_outputs;
  207. auto op_desc = node->GetOpDesc();
  208. GE_CHK_STATUS_RET_NOLOG(PrepareInputs(op_desc, inputs, named_inputs));
  209. GE_CHK_STATUS_RET_NOLOG(PrepareOutputs(op_desc, tmp_outputs, named_outputs));
  210. GE_CHK_STATUS_RET_NOLOG(RunInternal(op_desc, *op_kernel, named_inputs, named_outputs));
  211. GELOGD("Run node by host cpu engine successfully. name node = %s", node->GetName().c_str());
  212. outputs.swap(tmp_outputs);
  213. return SUCCESS;
  214. }
  215. ge::Status HostCpuEngine::GetLibPath(std::string &lib_path) {
  216. GELOGI("Start to get host cpu lib path");
  217. const char *path_env = std::getenv(kEnvKeyOppPath);
  218. if (path_env != nullptr) {
  219. lib_path = path_env;
  220. if (!lib_path.empty()) {
  221. lib_path += kHostCpuLibRelativePath;
  222. GELOGI("Get host cpu so path from env: %s", lib_path.c_str());
  223. return SUCCESS;
  224. }
  225. }
  226. lib_path = PluginManager::GetPath();
  227. GELOGI("path_base is %s", lib_path.c_str());
  228. lib_path = lib_path.substr(0, lib_path.rfind('/'));
  229. lib_path = lib_path.substr(0, lib_path.rfind('/'));
  230. lib_path += "/opp";
  231. lib_path += kHostCpuLibRelativePath;
  232. GELOGI("Get host cpu so path from PluginManager::GetPath: %s", lib_path.c_str());
  233. return SUCCESS;
  234. }
  235. static int RegularFileFilterFn(const mmDirent *entry) { return entry->d_type == DT_REG; }
  236. Status HostCpuEngine::ListSoFiles(const std::string &base_dir, std::vector<std::string> &names) {
  237. std::string real_path = base_dir;
  238. GE_CHK_STATUS_RET_NOLOG(GetRealPath(real_path));
  239. real_path.push_back('/');
  240. mmDirent **entries = nullptr;
  241. auto ret = mmScandir(real_path.c_str(), &entries, RegularFileFilterFn, nullptr);
  242. if (ret < 0) {
  243. GELOGW("scan dir failed. path = %s, ret = %d", real_path.c_str(), ret);
  244. return INTERNAL_ERROR;
  245. }
  246. for (int i = 0; i < ret; ++i) {
  247. mmDirent *dir_ent = entries[i];
  248. string name = string(dir_ent->d_name);
  249. if (IsSoFile(name)) {
  250. names.emplace_back(real_path + name);
  251. }
  252. }
  253. mmScandirFree(entries, ret);
  254. GELOGI("Found %d libs to load", ret);
  255. return SUCCESS;
  256. }
  257. bool HostCpuEngine::IsSoFile(const std::string &file_name) {
  258. static const std::string so_suffix(".so");
  259. auto pos = file_name.rfind(so_suffix);
  260. if (pos == string::npos) {
  261. return false;
  262. }
  263. return pos == file_name.size() - so_suffix.size();
  264. }
  265. Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {
  266. for (auto &so_path : lib_paths) {
  267. GE_CHK_STATUS_RET_NOLOG(GetRealPath(so_path));
  268. GE_CHK_STATUS_RET_NOLOG(LoadLib(so_path));
  269. }
  270. return SUCCESS;
  271. }
  272. Status HostCpuEngine::LoadLib(const std::string &lib_path) {
  273. GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
  274. auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
  275. if (handle == nullptr) {
  276. GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror());
  277. return INTERNAL_ERROR;
  278. }
  279. auto initialize = (Status(*)(const HostCpuContext &))dlsym(handle, "Initialize");
  280. if (initialize != nullptr) {
  281. GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
  282. if (initialize(HostCpuContext()) != SUCCESS) {
  283. GELOGW("Failed to invoke function Initialize in lib: %s", lib_path.c_str());
  284. }
  285. }
  286. GELOGI("Lib: %s has been opened", lib_path.c_str());
  287. lib_handles_.emplace_back(handle);
  288. return SUCCESS;
  289. }
  290. Status HostCpuEngine::GetRealPath(std::string &path) {
  291. std::string real_path = RealPath(path.c_str());
  292. if (real_path.empty()) {
  293. GELOGW("File path %s is invalid.", path.c_str());
  294. return INTERNAL_ERROR;
  295. }
  296. path = real_path;
  297. return SUCCESS;
  298. }
  299. } // namespace ge

图引擎模块(GE)是MindSpore的一个子模块,其代码由C++实现,位于前端模块ME和底层硬件之间,起到承接作用。图引擎模块以ME下发的图作为输入,然后进行一系列的深度图优化操作,最后输出一张可以在底层硬件上高效运行的图。GE针对昇腾AI处理器的硬件结构特点,做了特定的优化工作,以此来充分发挥出昇腾AI处理器的强大算力。在进行模型训练/推理时,GE会被自动调用而用户并不感知。GE主要由GE API和GE Core两部分组成,详细的架构图如下所示