Browse Source

add error msg

tags/v1.3.0
wangxiaotian22 4 years ago
parent
commit
a0bd2ce726
38 changed files with 733 additions and 98 deletions
  1. +7
    -7
      ge/graph/build/stream_graph_optimizer.cc
  2. +1
    -1
      ge/graph/build/task_generator.cc
  3. +5
    -5
      ge/graph/execute/graph_execute.cc
  4. +19
    -0
      ge/graph/load/graph_loader.cc
  5. +61
    -0
      ge/graph/load/model_manager/cpu_queue_schedule.cc
  6. +34
    -0
      ge/graph/load/model_manager/data_dumper.cc
  7. +107
    -2
      ge/graph/load/model_manager/model_manager.cc
  8. +13
    -0
      ge/graph/load/model_manager/model_utils.cc
  9. +4
    -3
      ge/graph/load/model_manager/task_info/end_graph_task_info.cc
  10. +2
    -1
      ge/graph/load/model_manager/task_info/event_record_task_info.cc
  11. +2
    -2
      ge/graph/load/model_manager/task_info/event_wait_task_info.cc
  12. +1
    -1
      ge/graph/load/model_manager/task_info/fusion_start_task_info.cc
  13. +1
    -1
      ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc
  14. +5
    -5
      ge/graph/load/model_manager/task_info/hccl_task_info.cc
  15. +14
    -12
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
  16. +38
    -38
      ge/graph/load/model_manager/task_info/kernel_task_info.cc
  17. +12
    -0
      ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
  18. +10
    -0
      ge/graph/load/model_manager/task_info/label_set_task_info.cc
  19. +27
    -0
      ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
  20. +10
    -0
      ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc
  21. +4
    -0
      ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc
  22. +3
    -0
      ge/graph/load/model_manager/task_info/model_exit_task_info.cc
  23. +3
    -0
      ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc
  24. +13
    -0
      ge/graph/load/model_manager/task_info/stream_active_task_info.cc
  25. +25
    -0
      ge/graph/load/model_manager/task_info/stream_switch_task_info.cc
  26. +33
    -0
      ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc
  27. +3
    -3
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc
  28. +5
    -5
      ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc
  29. +2
    -0
      ge/graph/load/model_manager/task_info/task_info.cc
  30. +8
    -0
      ge/graph/load/model_manager/tbe_handle_store.cc
  31. +2
    -0
      ge/graph/load/model_manager/zero_copy_offset.cc
  32. +4
    -0
      ge/graph/load/model_manager/zero_copy_task.cc
  33. +11
    -0
      ge/graph/manager/graph_context.cc
  34. +194
    -2
      ge/graph/manager/graph_manager.cc
  35. +4
    -0
      ge/graph/manager/util/debug.cc
  36. +37
    -2
      ge/graph/manager/util/hcom_util.cc
  37. +8
    -7
      inc/framework/common/debug/log.h
  38. +1
    -1
      parser

+ 7
- 7
ge/graph/build/stream_graph_optimizer.cc View File

@@ -125,26 +125,26 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
GE_CHECK_NOTNULL(op_desc);
int64_t stream_id = op_desc->GetStreamId();
if (static_cast<size_t>(stream_id) >= run_context.graphStreamList.size()) {
REPORT_INNER_ERROR("E19999", "Check stream_id:%ld in op:%s(%s) is bigger than run_context.graphStreamList.size():%zu "
"when %s", stream_id, op_desc->GetName().c_str(),
REPORT_INNER_ERROR("E19999", "Check stream_id:%ld in op:%s(%s) is bigger than "
"run_context.graphStreamList.size():%zu when %s", stream_id, op_desc->GetName().c_str(),
op_desc->GetType().c_str(), run_context.graphStreamList.size(), __FUNCTION__);
GELOGE(FAILED, "stream_id %ld is bigger than run_context.graphStreamList.size() %zu", stream_id,
run_context.graphStreamList.size());
return FAILED;
}
run_context.stream = run_context.graphStreamList[stream_id];
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());
for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);
REPORT_CALL_ERROR("E19999", "Call optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "
"Optimizer num: %zu, ret: %u", subgraph->GetName().c_str(), engine_name.c_str(),
graph_optimizers.size(), ret);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Call optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "
"Optimizer num: %zu, ret: %u", subgraph->GetName().c_str(), engine_name.c_str(),
graph_optimizers.size(), ret);
GELOGE(
ret,
"[optimizeStreamedSubGraph]: optimize streamed subgraph failed, subgraph: %s, engine_name: %s, graph "


+ 1
- 1
ge/graph/build/task_generator.cc View File

@@ -1183,7 +1183,7 @@ Status TaskGenerator::SetUnknownShapeStream(RunContext &run_context, rtStream_t
run_context.stream = stream;
rtError_t rt_ret = rtModelBindStream(run_context.model, stream, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelBindStream fail, ret:0x%X when %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtModelBindStream failed, ret:0x%X when %s", rt_ret, __FUNCTION__);
GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
GE_CHK_RT_RET(rtStreamDestroy(stream));
return FAILED;


+ 5
- 5
ge/graph/execute/graph_execute.cc View File

@@ -40,7 +40,7 @@ GraphExecutor::~GraphExecutor() {
rtError_t rt_ret;
rt_ret = rtFreeHost(buffer_addr);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFreeHost fail, ret:0x%X when %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X when %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager] subgraph free buffer failed, ret: 0x%X", rt_ret);
}
}
@@ -106,7 +106,7 @@ Status GraphExecutor::FreeInOutBuffer() {
rtError_t rt_ret;
rt_ret = rtFreeHost(*iter);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFreeHost fail, ret:0x%X when %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X when %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager] subgraph free buffer failed, ret: 0x%X", rt_ret);
(void)buffer_addr_.erase(buffer_addr_.begin(), iter);
return GE_GRAPH_FREE_FAILED;
@@ -152,7 +152,7 @@ Status GraphExecutor::MallocInOutBuffer(const std::vector<uint64_t> &buffer_size
void *tmp_buf = nullptr;
rt_ret = rtMallocHost(&tmp_buf, buffer_size[i]);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMallocHost fail, size:%lu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%lu, ret:0x%X when %s",
buffer_size[i], rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager] subgraph malloc buffer failed, ret: 0x%X", rt_ret);
return GE_GRAPH_MALLOC_FAILED;
@@ -199,7 +199,7 @@ Status GraphExecutor::PrepareInputData(const std::vector<GeTensor> &input_tensor
rtError_t rt_ret = rtMemcpy(addrVec[i], bufferSizeVec[i], in_tensor->GetData().data(),
in_tensor->GetData().size(), RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, dst_size:%lu, src_size:%zu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X when %s",
bufferSizeVec[i], in_tensor->GetData().size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
@@ -310,7 +310,7 @@ Status GraphExecutor::SyncExecuteModel(uint32_t model_id, const std::vector<GeTe
rtError_t ret_value = rtMemcpy(outBufTmp.get(), outputDataTmp.length, outputDataTmp.data, outputDataTmp.length,
RT_MEMCPY_HOST_TO_HOST);
CHECK_FALSE_EXEC(ret_value == RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, dst_size:%lu, src_size:%zu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, dst_size:%lu, src_size:%zu, ret:0x%X when %s",
outputDataTmp.length, outputDataTmp.length, ret_value, __FUNCTION__);
GELOGE(GE_GRAPH_EXECUTE_FAILED, "Call rt api rtMemcpy failed, ret: 0x%X", ret);
return GE_GRAPH_EXECUTE_FAILED);


+ 19
- 0
ge/graph/load/graph_loader.cc View File

@@ -52,11 +52,15 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge
GELOGI("Load model online begin.");
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_LOAD);
return RT_FAILED;
}
if (ge_root_model_ptr == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param ge_root_model_ptr nullptr, check invalid when GraphLoader %s",
__FUNCTION__);
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr.");
return GE_GRAPH_PARAM_NULLPTR;
}
@@ -71,6 +75,8 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge

rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
}
return ret;
@@ -84,6 +90,8 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge

rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
}

@@ -93,6 +101,8 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
}
@@ -121,6 +131,8 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string

GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
REPORT_INNER_ERROR("E19999", "Param key_path:%s empty or invalid, when GraphLoader %s",
key_path.c_str(), __FUNCTION__);
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}
@@ -147,10 +159,12 @@ Status GraphLoader::CommandHandle(const Command &command) {
return ret;
}
} catch (std::bad_alloc &) {
REPORT_INNER_ERROR("E19999", "Bad memory allocation occur when GraphLoader %s", __FUNCTION__);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Command handle failed, bad memory allocation occur !");

return ACL_ERROR_GE_MEMORY_ALLOCATION;
} catch (...) {
REPORT_INNER_ERROR("E19999", "Some exceptions occur when GraphLoader %s", __FUNCTION__);
GELOGE(FAILED, "Command handle failed, some exceptions occur !");

return FAILED;
@@ -232,6 +246,8 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn
Status GraphLoader::GetMemoryInfo(int64_t &free) {
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
CsaInteract::GetInstance().WriteErrorCode(rt_ret, ERROR_MODULE_RUNTIME, JOBSUBSTATE_GRAPH_LOAD);
return RT_FAILED;
@@ -240,11 +256,14 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
size_t free_mem = 0;
rt_ret = rtMemGetInfo(&free_mem, &total_mem);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemGetInfo failed, ret:0x%X, when GraphLoader %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, ret:0x%X, when GraphLoader %s",
GetContext().DeviceId(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_FAILED;
}


+ 61
- 0
ge/graph/load/model_manager/cpu_queue_schedule.cc View File

@@ -51,6 +51,8 @@ CpuTaskInfo::~CpuTaskInfo() {
///
Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskModelDequeue %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -58,6 +60,8 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
args_size_ = sizeof(MbufQueueInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save in_mbuf.
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelDequeue %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -69,6 +73,8 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {
queue_info.in_mbuf = in_mbuf; // Placeholder, input mbuf addr will save to this place.
status = rtMemcpy(args_, args_size_, &queue_info, sizeof(MbufQueueInfo), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelDequeue %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -78,12 +84,16 @@ Status CpuTaskModelDequeue::Init(uint32_t queue_id, uintptr_t &in_mbuf) {

Status CpuTaskModelDequeue::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
"check invalid when CpuTaskModelDequeue %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelDequeue, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelDequeue %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelDequeue failed, status: 0x%X", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -101,6 +111,8 @@ Status CpuTaskModelDequeue::Distribute() {
///
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskZeroCopy %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -155,12 +167,16 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32

Status CpuTaskZeroCopy::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
"check invalid when CpuTaskZeroCopy %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskZeroCopy, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskZeroCopy %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ZeroCopy failed, status: 0x%X", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -199,6 +215,8 @@ CpuTaskZeroCopy::~CpuTaskZeroCopy() {
///
Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mbuf, uintptr_t &out_mbuf) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskPrepareOutput %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -206,6 +224,8 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb
args_size_ = sizeof(PrepareOutputInfo) + sizeof(uintptr_t); // sizeof(uintptr_t) for save out_mbuf.
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskPrepareOutput %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -220,6 +240,8 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb
prepare.out_mbuf = out_mbuf; // Placeholder, output mbuf addr will save to this place.
status = rtMemcpy(args_, args_size_, &prepare, sizeof(PrepareOutputInfo), RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskPrepareOutput %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -229,12 +251,16 @@ Status CpuTaskPrepareOutput::Init(uintptr_t addr, uint32_t size, uintptr_t in_mb

Status CpuTaskPrepareOutput::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
"check invalid when CpuTaskPrepareOutput %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskPrepareOutput, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskPrepareOutput %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch PrepareOutput failed, status: 0x%X", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -252,6 +278,8 @@ Status CpuTaskPrepareOutput::Distribute() {
///
Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskModelEnqueue %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -260,6 +288,8 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
args_size_ = sizeof(MbufQueueInfo);
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelEnqueue %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -270,6 +300,8 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {
queue_info.in_mbuf = out_mbuf;
status = rtMemcpy(args_, args_size_, &queue_info, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelEnqueue %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -279,12 +311,16 @@ Status CpuTaskModelEnqueue::Init(uint32_t queue_id, uintptr_t out_mbuf) {

Status CpuTaskModelEnqueue::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_ is 0 or stream_ is nullptr, arg_size:%u,"
"check invalid when CpuTaskModelEnqueue %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelEnqueue, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelEnqueue %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelEnqueue failed, status: 0x%X", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -301,6 +337,7 @@ Status CpuTaskModelEnqueue::Distribute() {
///
Status CpuTaskActiveEntry::Init(rtStream_t stream) {
if (stream == nullptr) {
REPORT_INNER_ERROR("E19999", "Param stream is nullptr, check invalid when CpuTaskActiveEntry %s", __FUNCTION__);
GELOGE(FAILED, "Task active stream not valid");
return FAILED;
}
@@ -311,12 +348,16 @@ Status CpuTaskActiveEntry::Init(rtStream_t stream) {

Status CpuTaskActiveEntry::Distribute() {
if ((active_stream_ == nullptr) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param stream is nullptr or active_stream_ is nullptr, "
"check invalid when CpuTaskActiveEntry %s", __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t ret = rtStreamActive(active_stream_, stream_);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X, when CpuTaskActiveEntry %s",
ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt StreamActive failed, ret: 0x%X", ret);
return RT_ERROR_TO_GE_STATUS(ret);
}
@@ -333,6 +374,8 @@ Status CpuTaskActiveEntry::Distribute() {
///
Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskWaitEndGraph %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -340,6 +383,8 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {
args_size_ = sizeof(model_id);
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskWaitEndGraph %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -347,6 +392,8 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {

status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskWaitEndGraph %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -356,12 +403,16 @@ Status CpuTaskWaitEndGraph::Init(uint32_t model_id) {

Status CpuTaskWaitEndGraph::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
"check invalid when CpuTaskWaitEndGraph %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskWaitEndGraph, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskWaitEndGraph %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch WaitEndGraph failed, status: 0x%X", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -378,6 +429,8 @@ Status CpuTaskWaitEndGraph::Distribute() {
///
Status CpuTaskModelRepeat::Init(uint32_t model_id) {
if ((args_ != nullptr) || (args_size_ > 0)) {
REPORT_INNER_ERROR("E19999", "Param args_ is not nullptr or args_size_:%u > 0,"
"check invalid when CpuTaskModelRepeat %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED;
}
@@ -385,6 +438,8 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) {
args_size_ = sizeof(model_id);
rtError_t status = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when CpuTaskModelRepeat %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt malloc failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -392,6 +447,8 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) {

status = rtMemcpy(args_, args_size_, &model_id, args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when CpuTaskModelRepeat %s",
args_size_, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt memcpy failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -401,12 +458,16 @@ Status CpuTaskModelRepeat::Init(uint32_t model_id) {

Status CpuTaskModelRepeat::Distribute() {
if ((args_ == nullptr) || (args_size_ == 0) || (stream_ == nullptr)) {
REPORT_INNER_ERROR("E19999", "Param args_ is nullptr or args_size_:%u is 0 or stream_ is nullptr,"
"check invalid when CpuTaskModelRepeat %s", args_size_, __FUNCTION__);
GELOGE(FAILED, "Task not initialized, distribute failed, size: %u", args_size_);
return FAILED;
}

rtError_t status = rtCpuKernelLaunch(nullptr, kCpuTaskModelRepeat, kCoreDim, args_, args_size_, nullptr, stream_);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCpuKernelLaunch failed, ret:0x%X, when CpuTaskModelRepeat %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt CpuKernelLaunch ModelRepeat failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}


+ 34
- 0
ge/graph/load/model_manager/data_dumper.cc View File

@@ -325,6 +325,7 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size fail when DataDumper %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Get output size filed");
return PARAM_INVALID;
}
@@ -387,6 +388,9 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicp
const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
if (output_descs.size() != output_addrs.size()) {
REPORT_INNER_ERROR("E19999", "output_desc size:%zu != output addr size:%zu in op:%s(%s) when DataDumper %s",
output_descs.size(), output_addrs.size(),
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
inner_dump_info.op->GetName().c_str(), output_descs.size());
return PARAM_INVALID;
@@ -411,6 +415,9 @@ Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicp
GELOGI("[L1Fusion] DumpOutputWithTask[%s] output[%zu] is l1 addr.", inner_dump_info.op->GetName().c_str(), i);
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get output tensor size fail in op:%s(%s), index:%zu, when DataDumper %s",
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i,
__FUNCTION__);
GELOGE(PARAM_INVALID, "Get output size failed.");
return PARAM_INVALID;
}
@@ -438,6 +445,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
if (output_tensor == nullptr) {
REPORT_INNER_ERROR("E19999", "output_desc tensor is nullptr in op:%s(%s), index:%u, "
"check invalid when DataDumper %s",
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(),
inner_dump_info.output_anchor_index, __FUNCTION__);
GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
inner_dump_info.op->GetOutputsSize());
return PARAM_INVALID;
@@ -461,6 +472,9 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
output.set_original_output_data_type(static_cast<int32_t>(output_tensor->GetOriginDataType()));
// due to lhisi virtual addr bug, cannot use args now
if (inner_dump_info.output_anchor_index >= static_cast<int>(output_addrs.size())) {
REPORT_INNER_ERROR("E19999", "output_anchor_index:%u >= output addr size:%zu in op:%s(%s), "
"check invalid when DataDumper %s", inner_dump_info.output_anchor_index, output_addrs.size(),
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Index is out of range.");
return FAILED;
}
@@ -487,6 +501,7 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);
} else if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), input_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size fail when DataDumper %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Get input size filed");
return PARAM_INVALID;
}
@@ -542,6 +557,9 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::
const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op);
if (input_descs.size() != input_addrs.size()) {
REPORT_INNER_ERROR("E19999", "input_desc size:%zu != input addr size:%zu in op:%s(%s) when DataDumper %s",
input_descs.size(), input_addrs.size(),
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
inner_dump_info.op->GetName().c_str(), input_descs.size());
return PARAM_INVALID;
@@ -567,6 +585,9 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::
if (AttrUtils::GetInt(input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);
} else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get input tensor size fail in op:%s(%s), index:%zu, when DataDumper %s",
inner_dump_info.op->GetName().c_str(), inner_dump_info.op->GetType().c_str(), i,
__FUNCTION__);
GELOGE(PARAM_INVALID, "Get input size failed.");
return PARAM_INVALID;
}
@@ -595,6 +616,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in
size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_str);
if (!ret || proto_size == 0) {
REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return PARAM_INVALID;
}
@@ -606,6 +628,8 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in

rtError_t rt_ret = rtMalloc(&dev_mem_load_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when DataDumper %s",
proto_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -613,12 +637,15 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in

rt_ret = rtMemcpy(dev_mem_load_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when DataDumper %s",
proto_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtDatadumpInfoLoad(dev_mem_load_, proto_size);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X, when DataDumper %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -633,6 +660,7 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_str);
if (!ret || proto_size == 0) {
REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Protobuf SerializeToString failed, proto size %zu.", proto_size);
return PARAM_INVALID;
}
@@ -644,6 +672,8 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_

rtError_t rt_ret = rtMalloc(&dev_mem_unload_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when DataDumper %s",
proto_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -651,12 +681,15 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_

rt_ret = rtMemcpy(dev_mem_unload_, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when DataDumper %s",
proto_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtDatadumpInfoLoad(dev_mem_unload_, proto_size);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDatadumpInfoLoad failed, ret:0x%X, when DataDumper %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtDatadumpInfoLoad failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -941,6 +974,7 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
if (!ret || proto_size == 0) {
REPORT_INNER_ERROR("E19999", "Serialize proto to string fail when DataDumper %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
return PARAM_INVALID;
}


+ 107
- 2
ge/graph/load/model_manager/model_manager.cc View File

@@ -99,11 +99,17 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u

auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size());
rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret: 0x%X when ModelManager %s",
kernel_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X when ModelManager %s",
kernel_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
uint64_t kernel_id_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(aicpu_kernel_addr));
param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr;
@@ -114,6 +120,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u

rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret: 0x%X when ModelManager %s",
sizeof(STR_FWK_OP_KERNEL), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -122,6 +130,8 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
rt_ret =
rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), &param_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X when ModelManager %s",
sizeof(STR_FWK_OP_KERNEL), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
@@ -131,6 +141,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
rtStream_t stream = nullptr;
rt_ret = rtStreamCreate(&stream, 0);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamCreate failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
@@ -139,6 +150,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u

rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
@@ -147,6 +159,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}
rt_ret = rtStreamSynchronize(stream);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret);
GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr)));
GE_CHK_RT(rtFree(devicebase));
@@ -156,6 +169,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
if (aicpu_kernel_addr != nullptr) {
rt_ret = rtFree(aicpu_kernel_addr);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret);
GE_CHK_RT(rtFree(devicebase));
GE_CHK_RT(rtStreamDestroy(stream));
@@ -164,12 +178,14 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}
rt_ret = rtFree(devicebase);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret);
GE_CHK_RT(rtStreamDestroy(stream));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtStreamDestroy(stream);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamDestroy failed, ret: 0x%X when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -216,6 +232,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
REPORT_INNER_ERROR("E19999", "Param model_id:%u can't find in model_map, check invalid when ModelManager %s",
model_id, __FUNCTION__);
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}
@@ -233,6 +251,8 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id,
sub_model_id);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Call KernelLaunchEx fail, model_id:%u, sub_model_id:%u, session_id:%lu, "
"when ModelManager %s", model_id, sub_model_id, session_id, __FUNCTION__);
GELOGE(FAILED, "Destroy aicpu kernel failed.");
return FAILED;
}
@@ -289,6 +309,8 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string
bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) {
auto root_graph = ge_root_model.GetRootGraph();
if (root_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "root graph in param ge_root_model is nullptr, model_id:%u, "
"check invalid when ModelManager %s", ge_root_model.GetModelId(), __FUNCTION__);
GELOGE(FAILED, "no model on root model");
return false;
}
@@ -317,6 +339,7 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
mmTimespec timespec = mmGetTickCount();
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, listener);
if (davinci_model == nullptr) {
REPORT_CALL_ERROR("E19999", "New DavinciModel fail, model_id:%u, when ModelManager %s", model_id, __FUNCTION__);
GELOGE(FAILED, "davinci_model is nullptr");
return FAILED;
}
@@ -381,6 +404,8 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
REPORT_INNER_ERROR("E19999", "model_id:%u not exist in model_map, check invalid when ModelManager %s",
id, __FUNCTION__);
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}
@@ -427,6 +452,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d

Status status = data_wrap->Init(input_data, output_data);
if (status != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Init InputDataWrapper failed, input data index: %u, when ModelManager %s",
input_data.index, __FUNCTION__);
GELOGE(domi::PUSH_DATA_FAILED, "Init InputDataWrapper failed, input data index: %u.", input_data.index);
return domi::PUSH_DATA_FAILED;
}
@@ -443,6 +470,8 @@ Status ModelManager::DataInput(const InputData &input_data, OutputData &output_d
DataInputer *inputer = model->GetDataInputer();
GE_CHECK_NOTNULL(inputer);
if (inputer->Push(data_wrap) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "DataInputer queue is full, please call again later, model_id %u, when ModelManager %s",
model_id, __FUNCTION__);
GELOGE(domi::DATA_QUEUE_ISFULL, "Data queue is full, please call again later, model_id %u ", model_id);
return domi::DATA_QUEUE_ISFULL;
}
@@ -456,6 +485,9 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
vector<int32_t> &cur_dynamic_dims) {
GELOGD("Start get cur dynamic dims.");
if (user_real_input_dims.size() != user_input_dims.size()) {
REPORT_INNER_ERROR("E19999", "Param user_real_input_dims.size:%zu != user_input_dims.size:%zu, "
"check invalid when ModelManager %s",
user_real_input_dims.size(), user_input_dims.size(), __FUNCTION__);
GELOGE(INTERNAL_ERROR,
"The input count of user: %zu should be equal to the data count of graph: %zu",
user_real_input_dims.size(), user_input_dims.size());
@@ -464,6 +496,9 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_

for (size_t i = 0; i < user_input_dims.size(); ++i) {
if (user_real_input_dims[i].size() != user_input_dims[i].second.size()) {
REPORT_INNER_ERROR("E19999", "Param user_real_input_dims[%zu].size:%zu != user_input_dims[%zu].size:%zu, "
"check invalid when ModelManager %s", i, user_real_input_dims[i].size(),
i, user_input_dims[i].second.size(), __FUNCTION__);
GELOGE(INTERNAL_ERROR,
"The shape size: %zu of dynamic input: %s should be equal to the shape size of input shape: %zu.",
user_real_input_dims[i].size(), user_input_dims[i].first.c_str(), user_input_dims[i].second.size());
@@ -485,6 +520,8 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
}
}
if (!cur_dynamic_dims_valid) {
REPORT_INNER_ERROR("E19999", "cur dynamic dims is %s, not exist in options, check invalid "
"when ModelManager %s", formats::JoinToString(cur_dynamic_dims).c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.",
formats::JoinToString(cur_dynamic_dims).c_str());
return INTERNAL_ERROR;
@@ -636,6 +673,8 @@ Status ModelManager::HandleCommand(const Command &command) {

auto iter = cmds.find(command.cmd_type);
if (iter == cmds.end()) {
REPORT_INNER_ERROR("E19999", "Unsupported command:%s check when ModelManager %s",
command.cmd_type.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Unsupported command: %s", command.cmd_type.c_str());
return PARAM_INVALID;
} else {
@@ -646,6 +685,9 @@ Status ModelManager::HandleCommand(const Command &command) {
Status ModelManager::GetModelByCmd(const Command &command,
std::shared_ptr<DavinciModel> &davinci_model) {
if (command.cmd_params.size() < kCmdParSize) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < kCmdParSize:%u, command_type:%s, "
"check invalid when ModelManager %s", command.cmd_params.size(), kCmdParSize,
command.cmd_type.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.",
command.cmd_type.c_str());
return PARAM_INVALID;
@@ -658,12 +700,18 @@ Status ModelManager::GetModelByCmd(const Command &command,
try {
model_id = std::stoi(value);
} catch (std::invalid_argument &) {
REPORT_INNER_ERROR("E19999", "%s param:%s, check invalid when ModelManager %s", PROFILE_MODEL_ID.c_str(),
value.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str());
return PARAM_INVALID;
} catch (std::out_of_range &) {
REPORT_INNER_ERROR("E19999", "%s param:%s, check out of range when ModelManager %s", PROFILE_MODEL_ID.c_str(),
value.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str());
return PARAM_INVALID;
} catch (...) {
REPORT_INNER_ERROR("E19999", "%s param:%s, check cannot change to int when ModelManager %s",
PROFILE_MODEL_ID.c_str(), value.c_str(), __FUNCTION__);
GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str());
return FAILED;
}
@@ -672,10 +720,14 @@ Status ModelManager::GetModelByCmd(const Command &command,
GE_CHECK_NOTNULL(model_manager);
davinci_model = model_manager->GetModel(static_cast<uint32_t>(model_id));
if (davinci_model == nullptr) {
REPORT_INNER_ERROR("E19999", "GetModel from model_manager fail, model_id:%u, when ModelManager %s",
model_id, __FUNCTION__);
GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id);
return FAILED;
}
} else {
REPORT_INNER_ERROR("E19999", "Fisrt cmd_param not %s, check invalid when ModelManager %s",
PROFILE_MODEL_ID.c_str(), __FUNCTION__);
GELOGE(FAILED, "The model_id parameter is not found in the command.");
return FAILED;
}
@@ -739,10 +791,14 @@ Status ModelManager::HandleProfFinalizeCommand(const Command &command) {
*/
Status ModelManager::HandleProfStartCommand(const Command &command) {
if (command.cmd_params.size() < kProfStartCmdParaSize) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < %zu, check invalid when ModelManager %s",
command.cmd_params.size(), kProfStartCmdParaSize, __FUNCTION__);
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2.");
return PARAM_INVALID;
}
if (command.cmd_params.size() > kProfCmdParaMaxSize) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu > %zu, check invalid when ModelManager %s",
command.cmd_params.size(), kProfCmdParaMaxSize, __FUNCTION__);
GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size());
return PARAM_INVALID;
}
@@ -765,10 +821,14 @@ Status ModelManager::HandleProfStartCommand(const Command &command) {

Status ModelManager::HandleProfStopCommand(const Command &command) {
if (command.cmd_params.size() < kProfStartCmdParaSize) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu < %zu, check invalid when ModelManager %s",
command.cmd_params.size(), kProfStartCmdParaSize, __FUNCTION__);
GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2.");
return PARAM_INVALID;
}
if (command.cmd_params.size() > kProfCmdParaMaxSize) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu > %zu, check invalid when ModelManager %s",
command.cmd_params.size(), kProfCmdParaMaxSize, __FUNCTION__);
GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size());
return PARAM_INVALID;
}
@@ -794,6 +854,8 @@ static Status ParserPara(const Command &command, const string &dump_key, string
if (iter != command.cmd_params.end()) {
++iter;
if (iter == command.cmd_params.end()) {
REPORT_INNER_ERROR("E19999", "dump_key:%s can't find in command.param, check invalid when ModelManager %s",
dump_key.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Invalid access.");
return PARAM_INVALID;
}
@@ -804,6 +866,8 @@ static Status ParserPara(const Command &command, const string &dump_key, string

Status ModelManager::HandleDumpCommand(const Command &command) {
if (command.cmd_params.size() % kDumpCmdPairSize != 0) {
REPORT_INNER_ERROR("E19999", "command.cmd_params.size:%zu MOD 2 != 0, check invalid when ModelManager %s",
command.cmd_params.size(), __FUNCTION__);
GELOGE(PARAM_INVALID, "When the cmd_type is 'dump', the size of cmd_params must be a even number.");
return PARAM_INVALID;
}
@@ -1020,6 +1084,7 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {

mmTimeval tv;
if (mmGetTimeOfDay(&tv, nullptr) != 0) {
REPORT_CALL_ERROR("E19999", "Call mmGetTimeOfDay fail when ModelManager %s", __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Failed to get current time.");
return INTERNAL_ERROR;
}
@@ -1064,6 +1129,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
GeModelPtr ge_model = model_helper.GetGeModel();
shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model.priority, listener);
if (davinci_model == nullptr) {
REPORT_CALL_ERROR("E19999", "New DavinciModel fail when ModelManager %s", __FUNCTION__);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
@@ -1079,6 +1145,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
REPORT_CALL_ERROR("E19999", "Call rtGetDevice failed, ret = 0x%X, when ModelManager %s", rt_ret, __FUNCTION__);
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -1137,6 +1204,7 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d

shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(model_data.priority, nullptr);
if (davinci_model == nullptr) {
REPORT_CALL_ERROR("E19999", "New DavinciModel fail when ModelManager %s", __FUNCTION__);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create model failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
@@ -1257,6 +1325,7 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_
rtContext_t rt_cur_ctx = nullptr;
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_error != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X, when ModelManager %s", rt_error, __FUNCTION__);
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_error));
return RT_FAILED;
}
@@ -1292,6 +1361,7 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
rtContext_t rt_cur_ctx = nullptr;
auto rt_error = rtCtxGetCurrent(&rt_cur_ctx);
if (rt_error != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCtxGetCurrent failed, ret = 0x%X, when ModelManager %s", rt_error, __FUNCTION__);
GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_error));
return RT_FAILED;
}
@@ -1317,12 +1387,16 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {

status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret = 0x%X, when ModelManager %s",
aicpu_data_length, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_aicpu_data);
status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X, when ModelManager %s",
so_name.size(), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1345,6 +1419,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size();
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X, when ModelManager %s",
args_size, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1359,6 +1435,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs);
status = rtMalloc(&batch_args, batch_args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X, when ModelManager %s",
batch_args_size, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1371,6 +1449,8 @@ Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {

status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X, when ModelManager %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1415,6 +1495,8 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me

auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data);
if (partition_table->num == 1) {
REPORT_INNER_ERROR("E19999", "partition_table num in model_data is 1, check invalid when ModelManager %s",
__FUNCTION__);
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "om model is error,please use executable om model");
return ACL_ERROR_GE_PARAM_INVALID;
}
@@ -1481,6 +1563,8 @@ ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector<GeTens
vector<GeTensor> &outputs) {
auto model = GetHybridModel(model_id);
if (model == nullptr) {
REPORT_INNER_ERROR("E19999", "partition_table num in model_data is 1, check invalid when ModelManager %s",
__FUNCTION__);
GELOGE(FAILED, "Hybrid model not found. model id = %u.", model_id);
return FAILED;
}
@@ -1509,6 +1593,8 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
if (iter->second == "1") {
rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast<rtTaskFailCallback>(ExceptionCallback));
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetTaskFailCallback fail, ret = 0x%X, when ModelManager %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtSetTaskFailCallback failed");
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -1556,6 +1642,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
// malloc sysOpInfoList in SysOpCheckInfo
status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X, when ModelManager %s",
op_nums * sizeof(SysOpInfo), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1564,6 +1652,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
// malloc sysOpInfoList in SysOpCheckResp
status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X, when ModelManager %s",
op_nums * sizeof(SysOpInfo), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1572,6 +1662,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
// malloc returnCodeList in SysOpCheckResp
status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret = 0x%X, when ModelManager %s",
op_nums * sizeof(ReturnCode), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1583,6 +1675,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X, when ModelManager %s",
op_type.length(), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1600,6 +1694,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret = 0x%X, when ModelManager %s",
op_type.length(), status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1628,6 +1724,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret = 0x%X, when ModelManager %s",
args_size, status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
@@ -1643,6 +1741,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op

status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamSynchronize fail, ret = 0x%X, when ModelManager %s",
status, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
GE_CHK_RT(rtStreamDestroy(stream));
return RT_ERROR_TO_GE_STATUS(status);
@@ -1675,6 +1775,9 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
REPORT_INNER_ERROR("E19999", "res_ret_code_list.size:%zu res_aicpu_op_info_list.size:%zu res_op_nums:%lu "
"not equal, check invalid when ModelManager %s",
res_ret_code_list.size(), res_aicpu_op_info_list.size(), res_op_nums, __FUNCTION__);
GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
@@ -1698,6 +1801,8 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op
"<0: op_type, 1: format, 2: datatype> \n";
}
fail_reason += "not support.";
REPORT_INNER_ERROR("E19999", "Check aicpu op_type failed, details:%s when ModelManager %s",
fail_reason.c_str(), __FUNCTION__);
GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;


+ 13
- 0
ge/graph/load/model_manager/model_utils.cc View File

@@ -25,6 +25,9 @@
#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \
do { \
if (SIZE <= static_cast<uint64_t>(OFFSET)) { \
REPORT_INNER_ERROR("E19999", \
"Node:%s(%s) offset:%ld out of range size:%lu, check invalid when ModelUtils %s", \
OP->GetName().c_str(), OP->GetType().c_str(), OFFSET, SIZE, __FUNCTION__); \
GELOGE(OUT_OF_MEMORY, "Node: %s, memory out of range[%lu: %ld]", OP->GetName().c_str(), SIZE, OFFSET); \
return {}; \
} \
@@ -305,6 +308,9 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
vector<int64_t> v_memory_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, v_memory_type);
if (has_mem_type_attr && (v_memory_type.size() != inputs_size)) {
REPORT_INNER_ERROR("E19999", "Attr:%s, memory_type.size:%zu != input_desc.size:%zu, op:%s(%s), check invalid "
"when ModelUtils %s", ATTR_NAME_INPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), inputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Fusion: check input size failed, op: %s, input v_memory_type size: %zu input numbers: %zu",
op_desc->GetName().c_str(), v_memory_type.size(), inputs_size);
return v_input_data_addr;
@@ -384,6 +390,7 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc
switch (mem_type) {
case RT_MEMORY_RDMA_HBM:
if (offset < 0) {
REPORT_INNER_ERROR("E19999", "Param offset:%ld < 0, check invalid when ModelUtils %s", offset, __FUNCTION__);
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p",
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)));
return PARAM_INVALID;
@@ -395,6 +402,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc
var_addr = model_param.var_base + offset - model_param.logic_var_base;
break;
default:
REPORT_INNER_ERROR("E19999", "Get mem_type:%d for offset:%ld is unsupported, check invalid when ModelUtils %s",
mem_type, offset, __FUNCTION__);
GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type);
return PARAM_INVALID;
}
@@ -420,6 +429,9 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
vector<int64_t> v_memory_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, v_memory_type);
if (has_mem_type_attr && (v_memory_type.size() != outputs_size)) {
REPORT_INNER_ERROR("E19999", "Attr:%s, memory_type.size:%zu != output_desc.size:%zu, op:%s(%s), check invalid "
"when ModelUtils %s", ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), v_memory_type.size(), outputs_size,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID,
"Fusion: check output size failed, op: %s, output v_memory_type size: %lu output numbers: %zu",
op_desc->GetName().c_str(), v_memory_type.size(), outputs_size);
@@ -568,6 +580,7 @@ Status ModelUtils::GetRtAddress(const RuntimeParam &param, uintptr_t logic_addr,
param.var_size);
} else if (logic_addr != 0) {
mem_addr = nullptr;
REPORT_INNER_ERROR("E19999", "Check param logic addr:0x%lx abnormal when ModelUtils %s", logic_addr, __FUNCTION__);
GELOGE(PARAM_INVALID, "The logic addr:0x%lx is abnormal", logic_addr);
return PARAM_INVALID;
}


+ 4
- 3
ge/graph/load/model_manager/task_info/end_graph_task_info.cc View File

@@ -53,7 +53,7 @@ Status EndGraphTaskInfo::Distribute() {
GELOGI("Start to call rtEndGraphEx");
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtEndGraphEx fail ret:0x%X, when EndGraphTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtEndGraphEx failed, ret:0x%X, when EndGraphTaskInfo %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtEndGraphEx failed, ret: 0x%x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -61,7 +61,7 @@ Status EndGraphTaskInfo::Distribute() {
GELOGI("Start to call rtEndGraph");
rtError_t rt_ret = rtEndGraph(model_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtEndGraph fail ret:0x%X, when EndGraphTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtEndGraph failed, ret:0x%X, when EndGraphTaskInfo %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtEndGraph failed, ret: 0x%x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -71,7 +71,8 @@ Status EndGraphTaskInfo::Distribute() {
uint32_t stream_id = 0;
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId fail ret:0x%X, when EndGraphTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X, when EndGraphTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 2
- 1
ge/graph/load/model_manager/task_info/event_record_task_info.cc View File

@@ -50,7 +50,8 @@ Status EventRecordTaskInfo::Distribute() {
GELOGI("EventRecordTaskInfo Distribute Start.");
rtError_t rt_ret = rtEventRecord(event_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtEventRecord fail ret:0x%X, when EventRecordTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtEventRecord failed, ret:0x%X, when EventRecordTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 2
- 2
ge/graph/load/model_manager/task_info/event_wait_task_info.cc View File

@@ -51,7 +51,7 @@ Status EventWaitTaskInfo::Distribute() {
GELOGI("EventWaitTaskInfo Distribute Start.");
rtError_t rt_ret = rtStreamWaitEvent(stream_, event_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent fail ret:0x%X, when EventWaitTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtStreamWaitEvent failed, ret:0x%X, when EventWaitTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -59,7 +59,7 @@ Status EventWaitTaskInfo::Distribute() {

rt_ret = rtEventReset(event_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtEventReset fail ret:0x%X, when EventWaitTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtEventReset failed, ret:0x%X, when EventWaitTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);


+ 1
- 1
ge/graph/load/model_manager/task_info/fusion_start_task_info.cc View File

@@ -40,7 +40,7 @@ Status FusionStartTaskInfo::Distribute() {
GELOGI("FusionStartTaskInfo Distribute Start.");
rtError_t rt_ret = rtKernelFusionStart(stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelFusionStart fail ret:0x%X, when FusionStartTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtKernelFusionStart failed, ret:0x%X, when FusionStartTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);


+ 1
- 1
ge/graph/load/model_manager/task_info/fusion_stop_task_info.cc View File

@@ -40,7 +40,7 @@ Status FusionStopTaskInfo::Distribute() {
GELOGI("FusionStopTaskInfo Distribute Start.");
rtError_t rt_ret = rtKernelFusionEnd(stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelFusionEnd fail ret:0x%X, when FusionStopTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtKernelFusionEnd failed, ret:0x%X, when FusionStopTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);


+ 5
- 5
ge/graph/load/model_manager/task_info/hccl_task_info.cc View File

@@ -30,7 +30,7 @@ HcclTaskInfo::~HcclTaskInfo() {
if (private_def_ != nullptr) {
rtError_t ret = rtFreeHost(private_def_);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFreeHost fail ret:0x%X, when HcclTaskInfo %s", ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtFreeHost failed, ret:0x%X, when HcclTaskInfo %s", ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtFree Fail, ret = 0x%X.", ret);
}
private_def_ = nullptr;
@@ -179,7 +179,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
rtError_t rt_ret =
rtStreamCreateWithFlags(&stream, davinci_model->Priority(), RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamCreateWithFlags fail ret:0x%X, stream_idx:%ld, stream_num:%ld, "
REPORT_CALL_ERROR("E19999", "Call rtStreamCreateWithFlags failed, ret:0x%X, stream_idx:%ld, stream_num:%ld, "
"when HcclTaskInfo %s", rt_ret, i, stream_num, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -187,7 +187,7 @@ Status HcclTaskInfo::CreateStream(int64_t stream_num, DavinciModel *davinci_mode
// Create slave stream, inactive by default, activated by hccl
rt_ret = rtModelBindStream(davinci_model->GetRtModelHandle(), stream, RT_MODEL_WAIT_ACTIVE_STREAM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelBindStream fail ret:0x%X, stream_idx:%ld, stream_num:%ld, "
REPORT_CALL_ERROR("E19999", "Call rtModelBindStream failed, ret:0x%X, stream_idx:%ld, stream_num:%ld, "
"when HcclTaskInfo %s", rt_ret, i, stream_num, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
(void)rtStreamDestroy(stream);
@@ -332,7 +332,7 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) {
private_def_len_ = private_def_temp.size();
rtError_t ret = rtMallocHost(&private_def_, private_def_len_);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMallocHost fail ret:0x%X, size:%u, when HcclTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, ret:0x%X, size:%u, when HcclTaskInfo %s",
ret, private_def_len_, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMallocHost Fail, ret = 0x%X.", ret);
return;
@@ -341,7 +341,7 @@ void HcclTaskInfo::GetPrivateDefByTaskDef(const domi::TaskDef &task) {
ret = rtMemcpy(private_def_, private_def_len_, task.private_def().c_str(), private_def_len_,
RT_MEMCPY_HOST_TO_HOST);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail ret:0x%X, size:%u, when HcclTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u, when HcclTaskInfo %s",
ret, private_def_len_, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMemcpy Fail, ret = 0x%X.", ret);
return;


+ 14
- 12
ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc View File

@@ -75,14 +75,14 @@ Status KernelExTaskInfo::InitTaskExtInfo(const std::string &ext_info, const OpDe
}
auto rt_ret = rtMalloc(&ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelExTaskInfo %s",
ext_info.size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(),
ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelExTaskInfo %s",
ext_handle->GetExtInfoLen(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -169,7 +169,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
void *workspace_base_addr = nullptr;
rtError_t rt_ret = rtMalloc(&workspace_base_addr, kernel_ex_def.task_info_size(), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret:0x%X, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when KernelExTaskInfo %s",
kernel_ex_def.task_info_size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret););
@@ -183,7 +183,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

rt_ret = rtMalloc(&kernel_buf_, kernel_buf_size_, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail ret:0x%X, size:%u, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%u, when KernelExTaskInfo %s",
rt_ret, kernel_buf_size_, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -191,7 +191,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
rt_ret = rtMemcpy(kernel_buf_, kernel_buf_size_, static_cast<void *>(&fwk_op_kernel), kernel_buf_size_,
RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail ret:0x%X, size:%u, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%u, when KernelExTaskInfo %s",
rt_ret, kernel_buf_size_, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -228,14 +228,14 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
if (addrs_size > 0) {
rtError_t rt_ret = rtMalloc(&input_output_addr_, addrs_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail ret:0x%X, size:%lu, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%lu, when KernelExTaskInfo %s",
rt_ret, addrs_size, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

rt_ret = rtMemcpy(input_output_addr_, addrs_size, io_addrs.data(), addrs_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail ret:0x%X, size:%lu, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%lu, when KernelExTaskInfo %s",
rt_ret, addrs_size, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -257,7 +257,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
// 4. Return result
rtError_t rt_ret = rtMalloc(&kernel_buf_, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail ret:0x%X, size:%zu, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, ret:0x%X, size:%zu, when KernelExTaskInfo %s",
rt_ret, sizeof(STR_FWK_OP_KERNEL), __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -265,7 +265,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
rt_ret = rtMemcpy(kernel_buf_, sizeof(STR_FWK_OP_KERNEL), static_cast<void *>(&fwk_op_kernel),
sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail ret:0x%X, size:%zu, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%zu, when KernelExTaskInfo %s",
rt_ret, sizeof(STR_FWK_OP_KERNEL), __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
@@ -397,7 +397,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
rtError_t rt_ret = rtMemcpy(workspace_data_addrs[0], kernel_def.task_info_size(), kernel_def.task_info().data(),
kernel_def.task_info_size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail ret:0x%X, size:%d, when KernelExTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, ret:0x%X, size:%d, when KernelExTaskInfo %s",
rt_ret, kernel_def.task_info_size(), __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -410,7 +410,8 @@ Status KernelExTaskInfo::Distribute() {
GELOGI("KernelExTaskInfo Distribute Start.");
rtError_t rt_ret = rtKernelLaunchEx(kernel_buf_, kernel_buf_size_, dump_flag_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx fail ret:0x%X when KernelExTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchEx failed, ret:0x%X when KernelExTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -425,7 +426,8 @@ Status KernelExTaskInfo::Distribute() {
uint32_t stream_id = 0; // for profiling
rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId fail ret:0x%X when KernelExTaskInfo %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X when KernelExTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 38
- 38
ge/graph/load/model_manager/task_info/kernel_task_info.cc View File

@@ -94,7 +94,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName fail for op:%s(%s), "
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed for op:%s(%s), "
"bin_file_key:%s, ret:0x%X, when KernelTaskInfo %s",
op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
kernel_def.stub_func().c_str(), rt_ret, __FUNCTION__);
@@ -108,7 +108,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName fail for op:%s(%s), "
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed for op:%s(%s), "
"bin_file_key:%s, ret:0x%X, when KernelTaskInfo %s",
op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
bin_file_key, rt_ret, __FUNCTION__);
@@ -181,7 +181,7 @@ void KernelTaskInfo::UpdateSKTTaskId() {
if (davinci_model_ != nullptr) {
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return;
@@ -201,7 +201,7 @@ void KernelTaskInfo::UpdateTaskId() {
if (davinci_model_ != nullptr) {
rtError_t rt_ret = rtModelGetTaskId(davinci_model_->GetRtModelHandle(), &task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtModelGetTaskId failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return;
@@ -258,7 +258,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
static_cast<rtSmDesc_t *>(skt_info.last_sm_desc), skt_info.last_stream,
skt_info.last_dump_flag);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "SuperKernelLaunch: Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -396,7 +396,7 @@ Status KernelTaskInfo::SuperKernelDistribute() {
rtError_t rt_ret = rtKernelLaunchWithFlag(stub_func_, block_dim_, args_, args_size_,
static_cast<rtSmDesc_t *>(sm_desc_), stream_, dump_flag_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return rt_ret;
@@ -462,7 +462,7 @@ Status KernelTaskInfo::Distribute() {
}
}
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag or rtCpuKernelLaunchWithFlag fail, "
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag or rtCpuKernelLaunchWithFlag failed, "
"ret:0x%X, when KernelTaskInfo %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -507,7 +507,7 @@ Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) {
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -557,7 +557,7 @@ Status KernelTaskInfo::Release() {

ret = (sm_desc_ != nullptr) ? rtMemFreeManaged(sm_desc_) : RT_ERROR_NONE;
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemFreeManaged fail, ret:0x%X, when KernelTaskInfo %s", ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtMemFreeManaged failed, ret:0x%X, when KernelTaskInfo %s", ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", static_cast<int>(ret));
return RT_ERROR_TO_GE_STATUS(ret);
}
@@ -588,7 +588,7 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {

rtError_t rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -596,7 +596,7 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {

rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sm_desc.size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -688,7 +688,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
// malloc args memory
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -697,7 +697,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
// copy orign args
rt_ret = rtMemcpy(args_, args_size_, kernel_def.args().data(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -716,7 +716,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
rt_ret = rtMemcpy(static_cast<char *>(args_) + offset, args_size_ - offset, tensor_device_addrs.data(),
kAddrLen * tensor_device_addrs.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
args_size_ - offset, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -724,7 +724,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(),
kAddrLen * tensor_device_addrs.size());
if (sec_ret != EOK) {
REPORT_CALL_ERROR("E19999", "Call memcpy_s fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call memcpy_s failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
args_size_ - offset, sec_ret, __FUNCTION__);
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
@@ -829,7 +829,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel

rtError_t rt_ret = rtMalloc(&custom_info_.attr_handle, op_attr_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -837,7 +837,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel

rt_ret = rtMemcpy(custom_info_.attr_handle, op_attr_size, buffer.GetData(), op_attr_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), op_attr_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -870,7 +870,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel

rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -879,7 +879,7 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(),
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
kernel_def.args_size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
@@ -947,7 +947,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
// args
rtError_t rt_ret = rtMalloc(&args_, kernel_def.args_size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
kernel_def.args_size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -957,7 +957,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
rt_ret = rtMemcpy(args_, kernel_def.args_size(), kernel_def.args().data(), kernel_def.args_size(),
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%u, ret:0x%X, when KernelTaskInfo %s",
kernel_def.args_size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -967,7 +967,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {
if (!sm_desc.empty()) {
rt_ret = rtMemAllocManaged(&sm_desc_, sm_desc.size(), RT_MEMORY_SPM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged fail, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemAllocManaged failed, ret:0x%X, when KernelTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -975,7 +975,7 @@ Status KernelTaskInfo::InitCceTask(const domi::KernelDef &kernel_def) {

rt_ret = rtMemcpy(sm_desc_, sm_desc.size(), sm_desc.data(), sm_desc.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sm_desc.size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1056,7 +1056,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api(rtMalloc) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1066,7 +1066,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
// copy args to device
rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%u, ret:0x%X, when KernelTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1140,7 +1140,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
}
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail for op:%s(%s), size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%zu, ret:0x%X, when KernelTaskInfo %s",
op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
ext_handle->GetExtInfoLen(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
@@ -1149,7 +1149,7 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_handle->GetExtInfoLen(), ext_handle->GetExtInfo(),
ext_handle->GetExtInfoLen(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail for op:%s(%s), size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%zu, ret:0x%X, when KernelTaskInfo %s",
op_desc_->GetName().c_str(), op_desc_->GetType().c_str(),
ext_handle->GetExtInfoLen(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
@@ -1169,7 +1169,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
// inputDescs
rtError_t rt_ret = rtMalloc(&custom_info_.input_descs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t) * input_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1179,7 +1179,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.input_descs) + i, sizeof(opTensor_t),
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1189,7 +1189,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
// inputAddrs
rt_ret = rtMalloc(&custom_info_.input_addrs, sizeof(opTensor_t) * input_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t) * input_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1199,7 +1199,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMemcpy(custom_info_.input_addrs, kAddrLen * input_size, &input_data_addrs[0], kAddrLen * input_size,
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
kAddrLen * input_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1209,7 +1209,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
// outputDescs
rt_ret = rtMalloc(&custom_info_.output_descs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t) * output_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1218,7 +1218,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMemcpy(static_cast<opTensor_t *>(custom_info_.output_descs) + i, sizeof(opTensor_t),
const_cast<tagOpTensor *>(&input_descs[i]), sizeof(opTensor_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1228,7 +1228,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
// outputAddrs
rt_ret = rtMalloc(&custom_info_.output_addrs, sizeof(opTensor_t) * output_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
sizeof(opTensor_t) * output_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1238,7 +1238,7 @@ Status KernelTaskInfo::StoreInputOutputTensor(const std::vector<void *> &input_d
rt_ret = rtMemcpy(custom_info_.output_addrs, kAddrLen * output_size, &output_data_addrs[0], kAddrLen * output_size,
RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
kAddrLen * output_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1292,7 +1292,7 @@ void KernelTaskInfo::FreeRtMem(void **ptr) {
}
rtError_t ret = rtFree(*ptr);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtFree fail, ret:0x%X, when KernelTaskInfo %s", ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtFree failed, ret:0x%X, when KernelTaskInfo %s", ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
}

@@ -1391,7 +1391,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe
if (context.is_flowtable()) {
rtError_t rt_ret = rtMalloc(&flowtable_, flowtable.size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
flowtable.size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1400,7 +1400,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe

rt_ret = rtMemcpy(flowtable_, flowtable.size(), flowtable.data(), flowtable.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%zu, ret:0x%X, when KernelTaskInfo %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret:0x%X, when KernelTaskInfo %s",
flowtable.size(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);


+ 12
- 0
ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc View File

@@ -38,12 +38,17 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex();
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when LabelGotoExTaskInfo %s",
label_goto.op_index(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_goto.op_index());
return INTERNAL_ERROR;
}

uint32_t label_index = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when LabelGotoExTaskInfo %s",
ATTR_NAME_LABEL_SWITCH_INDEX.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelGotoExTaskInfo: %s attr [%s] not exist.",
op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
return INTERNAL_ERROR;
@@ -56,6 +61,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da

rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%lu, ret:0x%X, when LabelGotoExTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -63,6 +70,8 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
uint64_t branch_index = 0;
rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%lu, ret:0x%X, when LabelGotoExTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), sizeof(uint64_t), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -76,12 +85,15 @@ Status LabelGotoExTaskInfo::Distribute() {
GE_CHECK_NOTNULL(args_);
GE_CHECK_NOTNULL(index_value_);
if (args_size_ == 0) {
REPORT_INNER_ERROR("E19999", "Param args_size_ is 0, check fail when LabelGotoExTaskInfo %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_);
return PARAM_INVALID;
}

rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X, when LabelGotoExTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 10
- 0
ge/graph/load/model_manager/task_info/label_set_task_info.cc View File

@@ -32,12 +32,17 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
const domi::LabelSetDef &label_set = task_def.label_set();
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_set.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when LabelSetTaskInfo %s",
label_set.op_index(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_set.op_index());
return INTERNAL_ERROR;
}

uint32_t label_index = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_LABEL_SWITCH_INDEX, label_index)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when LabelSetTaskInfo %s",
ATTR_NAME_LABEL_SWITCH_INDEX.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: %s attr [%s] not exist.",
op_desc->GetName().c_str(), ATTR_NAME_LABEL_SWITCH_INDEX.c_str());
return INTERNAL_ERROR;
@@ -45,6 +50,9 @@ Status LabelSetTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
if (label_index >= label_list.size()) {
REPORT_INNER_ERROR("E19999", "lable_index:%u >= label_list.size():%zu in model, op:%s(%s), "
"check invalid when LabelSetTaskInfo %s", label_index, label_list.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSetTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
return INTERNAL_ERROR;
}
@@ -58,6 +66,8 @@ Status LabelSetTaskInfo::Distribute() {
GELOGI("LabelSetTaskInfo Distribute Start.");
rtError_t rt_ret = rtLabelSet(label_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtLabelSet failed, ret:0x%X, when LabelSetTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 27
- 0
ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc View File

@@ -39,6 +39,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index();
OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when LabelSwitchByIndexTaskInfo %s",
label_switch.op_index(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range!", label_switch.op_index());
return INTERNAL_ERROR;
}
@@ -47,6 +49,9 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo

auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
if (input_data_addr.size() != kLabelSwitchIndexNum) {
REPORT_INNER_ERROR("E19999", "input_data_addr size:%zu != kLabelSwitchIndexNum:%u, op:%s(%s), "
"check invalid when LabelSwitchByIndexTaskInfo %s", input_data_addr.size(), kLabelSwitchIndexNum,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s invalid addr size: %zu, num: %u!",
op_desc->GetName().c_str(), input_data_addr.size(), kLabelSwitchIndexNum);
return INTERNAL_ERROR;
@@ -62,12 +67,19 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo

vector<uint32_t> label_idx_list;
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when LabelSwitchByIndexTaskInfo %s",
ATTR_NAME_LABEL_SWITCH_LIST.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(),
ATTR_NAME_LABEL_SWITCH_LIST.c_str());
return INTERNAL_ERROR;
}

if (label_idx_list.empty() || label_idx_list.size() != branch_max_) {
REPORT_INNER_ERROR("E19999", "label_idx_list in op:%s(%s) is empty, or size:%zu != branch_max_:%u"
"check invalid when LabelSwitchByIndexTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
label_idx_list.size(), branch_max_, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s label index size: %zu, task branch max: %u.",
op_desc->GetName().c_str(), label_idx_list.size(), branch_max_);
return INTERNAL_ERROR;
@@ -78,6 +90,9 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
for (size_t idx = 0; idx < label_idx_list.size(); ++idx) {
uint32_t label_id = label_idx_list[idx];
if (label_id >= label_list.size()) {
REPORT_INNER_ERROR("E19999", "label_id:%u in op:%s(%s) >= label_list.size():%zu in model"
"check invalid when LabelSwitchByIndexTaskInfo %s", label_id,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), label_list.size(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s index: %zu, label index: %u, model label size: %zu.",
op_desc->GetName().c_str(), idx, label_id, label_list.size());
return INTERNAL_ERROR;
@@ -91,12 +106,17 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
args_size_ = branch_max_ * sizeof(rtLabelDevInfo);
rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%u, ret:0x%X, "
"when LabelSwitchByIndexTaskInfo %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), args_size_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtLabelListCpy failed, ret:0x%X, when LabelSwitchByIndexTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -110,12 +130,16 @@ Status LabelSwitchByIndexTaskInfo::Distribute() {
GE_CHECK_NOTNULL(args_);
GE_CHECK_NOTNULL(index_value_);
if (branch_max_ == 0 || args_size_ == 0) {
REPORT_INNER_ERROR("E19999", "branch_max_:%u or args_size_:%u is 0"
"check invalid when LabelSwitchByIndexTaskInfo %s", branch_max_, args_size_, __FUNCTION__);
GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", branch_max_, args_size_);
return PARAM_INVALID;
}

rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtLabelSwitchByIndex failed, ret:0x%X, when LabelSwitchByIndexTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -133,6 +157,9 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def,
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != kLabelSwitchIndexNum) {
REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != kLabelSwitchIndexNum"
"check invalid when LabelSwitchByIndexTaskInfo %s", op_desc->GetInputsSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}


+ 10
- 0
ge/graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc View File

@@ -36,6 +36,8 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
const auto &memcpy_async = task_def.memcpy_async();
OpDescPtr op_desc = davinci_model->GetOpByIndex(memcpy_async.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when MemcpyAddrAsyncTaskInfo %s",
memcpy_async.op_index(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
return INTERNAL_ERROR;
}
@@ -61,6 +63,9 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
GELOGI("memory_type: %u", memory_type);
rtError_t rt_ret = rtMalloc(&args_, args_size + kAlignBytes, memory_type);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed for op:%s(%s), size:%lu, ret:0x%X, "
"when MemcpyAddrAsyncTaskInfo %s", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
args_size + kAlignBytes, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -71,6 +76,9 @@ Status MemcpyAddrAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel
static_cast<uint8_t *>(args_align_) + args_size, dst_, io_addrs.size());
rt_ret = rtMemcpy(args_align_, args_size, io_addrs.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed for op:%s(%s), size:%zu, ret:0x%X, "
"when MemcpyAddrAsyncTaskInfo %s", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
args_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api for src failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -91,6 +99,8 @@ Status MemcpyAddrAsyncTaskInfo::Distribute() {
rtError_t rt_ret = rtMemcpyAsync(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(args_align_) + sizeof(void *)),
dst_max_, args_align_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X, when MemcpyAddrAsyncTaskInfo %s",
dst_max_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 4
- 0
ge/graph/load/model_manager/task_info/memcpy_async_task_info.cc View File

@@ -36,6 +36,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
dst_max_ = memcpy_async.dst_max();
OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when MemcpyAsyncTaskInfo %s",
memcpy_async.op_index(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async.op_index());
return INTERNAL_ERROR;
}
@@ -86,6 +88,8 @@ Status MemcpyAsyncTaskInfo::Distribute() {

rtError_t rt_ret = rtMemcpyAsync(dst_, dst_max_, src_, count_, static_cast<rtMemcpyKind_t>(kind_), stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync failed, size:%lu, ret:0x%X, when MemcpyAsyncTaskInfo %s",
dst_max_, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 3
- 0
ge/graph/load/model_manager/task_info/model_exit_task_info.cc View File

@@ -24,6 +24,7 @@ namespace ge {
Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("InitModelExitTaskInfo Init Start.");
if (davinci_model == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr when ModelExitTaskInfo %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID;
}
@@ -43,6 +44,8 @@ Status ModelExitTaskInfo::Distribute() {
GELOGI("ModelExitTaskInfo Distribute Start.");
rtError_t rt_ret = rtModelExit(model_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtModelExit failed, ret:0x%X, when ModelExitTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rtModelExit failed, ret: 0x%x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 3
- 0
ge/graph/load/model_manager/task_info/profiler_trace_task_info.cc View File

@@ -23,6 +23,7 @@ namespace ge {
Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("ProfilerTraceTaskInfo Init Start.");
if (davinci_model == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr when ProfilerTraceTaskInfo %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID;
}
@@ -46,6 +47,8 @@ Status ProfilerTraceTaskInfo::Distribute() {

rtError_t rt_ret = rtProfilerTrace(log_id_, notify_, flat_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtProfilerTrace failed, ret:0x%X, when ProfilerTraceTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 13
- 0
ge/graph/load/model_manager/task_info/stream_active_task_info.cc View File

@@ -26,6 +26,7 @@ namespace ge {
Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("StreamActiveTaskInfo Init Start.");
if (davinci_model == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr when StreamActiveTaskInfo %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID;
}
@@ -45,17 +46,27 @@ Status StreamActiveTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
GE_CHECK_NOTNULL(op_desc);
std::vector<uint32_t> active_stream_index_list;
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_index_list)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamActiveTaskInfo %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "StreamActiveOp get attr ACTIVE_STREAM fail, node name:%s.", op_desc->GetName().c_str());
return INTERNAL_ERROR;
}

if (internal_index >= active_stream_index_list.size()) {
REPORT_INNER_ERROR("E19999", "flowctrl index:%u >= active_stream_list size:%zu in op:%s(%s), "
"check invalid when StreamActiveTaskInfo %s", internal_index, active_stream_index_list.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream id index invalid. index:%u, list size:%zu.", internal_index,
active_stream_index_list.size());
return INTERNAL_ERROR;
}

if (active_stream_index_list[internal_index] >= davinci_model->GetStreamList().size()) {
REPORT_INNER_ERROR("E19999", "active_stream_index:%u in op:%s(%s) >= stream size:%zu in model, "
"check invalid when StreamActiveTaskInfo %s", active_stream_index_list[internal_index],
op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size(),
__FUNCTION__);
GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream index invalid. index:%u, stream list size:%zu.",
active_stream_index_list[internal_index], davinci_model->GetStreamList().size());
return INTERNAL_ERROR;
@@ -73,6 +84,8 @@ Status StreamActiveTaskInfo::Distribute() {
GELOGI("StreamActiveTaskInfo Distribute Start.");
rtError_t rt_ret = rtStreamActive(active_stream_, stream_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamActive failed, ret:0x%X, when StreamActiveTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}


+ 25
- 0
ge/graph/load/model_manager/task_info/stream_switch_task_info.cc View File

@@ -31,6 +31,7 @@ const uint32_t kTrueBranchStreamNum = 1;
Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GELOGI("StreamSwitchTaskInfo Init Start.");
if (davinci_model == nullptr) {
REPORT_INNER_ERROR("E19999", "Check param davinci_model nullptr when StreamSwitchTaskInfo %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "davinci_model is null!");
return PARAM_INVALID;
}
@@ -49,6 +50,9 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
SetInputAndValuePtr(davinci_model, input_data_addr);
uint32_t cond = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_STREAM_SWITCH_COND, cond)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamSwitchTaskInfo %s",
ATTR_NAME_STREAM_SWITCH_COND.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr STREAM_SWITCH_COND fail.");
return INTERNAL_ERROR;
}
@@ -56,6 +60,9 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d

size_t input_size = op_desc->GetInputsSize();
if (input_data_addr.size() != STREAM_SWITCH_INPUT_NUM || input_size != STREAM_SWITCH_INPUT_NUM) {
REPORT_INNER_ERROR("E19999", "input_data_addr.size():%zu or input size:%zu != STREAM_SWITCH_INPUT_NUM:%u "
"in op:%s(%s), check invalid when StreamSwitchTaskInfo %s", input_data_addr.size(), input_size,
STREAM_SWITCH_INPUT_NUM, op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Input num should be %u. inputAddr size:%zu, inputDesc size:%zu.",
STREAM_SWITCH_INPUT_NUM, input_data_addr.size(), input_size);
return INTERNAL_ERROR;
@@ -63,17 +70,27 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d

vector<uint32_t> active_stream_list;
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, active_stream_list)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamSwitchTaskInfo %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "StreamSwitchOp get attr ACTIVE_STREAM_LIST fail.");
return INTERNAL_ERROR;
}

if (active_stream_list.size() != kTrueBranchStreamNum) {
REPORT_INNER_ERROR("E19999", "active_stream_list.size():%zu in op:%s(%s) != kTrueBranchStreamNum:%u, "
"check invalid when StreamSwitchTaskInfo %s", active_stream_list.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), kTrueBranchStreamNum, __FUNCTION__);
GELOGE(FAILED, "Stream num of switch true branch must be %u.", kTrueBranchStreamNum);
return FAILED;
}

size_t true_stream_index = active_stream_list.front();
if (true_stream_index >= davinci_model->GetStreamList().size()) {
REPORT_INNER_ERROR("E19999", "active_stream_index:%zu in op:%s(%s) >= stream list size:%zu in model,"
"check invalid when StreamSwitchTaskInfo %s", true_stream_index,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size(),
__FUNCTION__);
GELOGE(INTERNAL_ERROR, "InitStreamSwitchTaskInfo stream index invalid. index:%zu, stream list size:%zu.",
true_stream_index, davinci_model->GetStreamList().size());
return INTERNAL_ERROR;
@@ -87,6 +104,9 @@ Status StreamSwitchTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *d
if (op_desc->HasAttr(ATTR_NAME_SWITCH_DATA_TYPE)) {
int64_t data_type = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_SWITCH_DATA_TYPE, data_type)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamSwitchTaskInfo %s",
ATTR_NAME_SWITCH_DATA_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "StreamSwitchOp[node:%s] get attr SWITCH_DATA_TYPE fail.", op_desc->GetName().c_str());
return FAILED;
}
@@ -103,6 +123,8 @@ Status StreamSwitchTaskInfo::Distribute() {
GELOGI("StreamSwitchTaskInfo Distribute Start.");
rtError_t rt_ret = rtStreamSwitchEx(input_ptr_, cond_, value_ptr_, true_stream_, stream_, data_type_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchEx fail, ret:0x%X, when StreamSwitchTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -119,6 +141,9 @@ Status StreamSwitchTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davinc
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != STREAM_SWITCH_INPUT_NUM) {
REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != STREAM_SWITCH_INPUT_NUM:%u,"
"check invalid when StreamSwitchTaskInfo %s", op_desc->GetInputsSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), STREAM_SWITCH_INPUT_NUM, __FUNCTION__);
GELOGE(FAILED, "Stream switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}


+ 33
- 0
ge/graph/load/model_manager/task_info/stream_switchn_task_info.cc View File

@@ -36,6 +36,8 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
auto stream_switchn_def = task_def.stream_switch_n();
OpDescPtr op_desc = davinci_model->GetOpByIndex(stream_switchn_def.op_index());
if (op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "Can't get op_desc from davinci_model by index:%u when StreamSwitchNTaskInfo %s",
stream_switchn_def.op_index(), __FUNCTION__);
GELOGE(FAILED, "Index is out of range, index: %u", stream_switchn_def.op_index());
return FAILED;
}
@@ -46,6 +48,9 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *
// set value_ptr_
auto value = stream_switchn_def.target_value();
if (value.size() == 0) {
REPORT_INNER_ERROR("E19999", "task_Def.stream_switch_n.target_value:%d in op:%s(%s) is 0,"
"check invalid when StreamSwitchNTaskInfo %s", value.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "The number of gears in dynamic batch scenario can not be 0.");
return FAILED;
}
@@ -57,6 +62,9 @@ Status StreamSwitchNTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *

// set element_size_
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_BATCH_NUM, element_size_)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamSwitchNTaskInfo %s",
ATTR_NAME_BATCH_NUM.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "Get ATTR_NAME_BATCH_NUM of switchN op failed.");
return FAILED;
}
@@ -84,6 +92,8 @@ Status StreamSwitchNTaskInfo::Distribute() {
rtError_t rt_ret =
rtStreamSwitchN(input_ptr_, input_size_, value_ptr_, true_stream_ptr_, element_size_, stream_, data_type_);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtStreamSwitchN failed, ret:0x%X, when InitStreamSwitchNTaskInfo %s",
rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@@ -96,11 +106,18 @@ Status StreamSwitchNTaskInfo::Distribute() {
Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, DavinciModel *davinci_model) {
vector<uint32_t> true_stream_id_list;
if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_ACTIVE_STREAM_LIST, true_stream_id_list)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when StreamSwitchNTaskInfo %s",
ATTR_NAME_ACTIVE_STREAM_LIST.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "StreamSwitchNOp get attr ACTIVE_STREAM_LIST fail.");
return FAILED;
}

if (true_stream_id_list.size() > davinci_model->GetStreamList().size()) {
REPORT_INNER_ERROR("E19999", "active_stream_list.size:%zu in op:%s(%s) >= stream list size:%zu in model,"
"check invalid when StreamSwitchNTaskInfo %s", true_stream_id_list.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size(),
__FUNCTION__);
GELOGE(FAILED,
"InitStreamSwitchNTaskInfo get true stream id list failed. true stream size:%zu, "
"stream list size:%zu.",
@@ -112,6 +129,10 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
for (size_t i = 0; i < true_stream_id_list.size(); ++i) {
uint32_t true_stream_id = true_stream_id_list[i];
if (true_stream_id >= davinci_model->GetStreamList().size()) {
REPORT_INNER_ERROR("E19999", "active_stream_id:%u in op:%s(%s) >= stream list size:%zu in model,"
"check invalid when StreamSwitchNTaskInfo %s", true_stream_id,
op_desc->GetName().c_str(), op_desc->GetType().c_str(), davinci_model->GetStreamList().size(),
__FUNCTION__);
GELOGE(FAILED, "InitStreamSwitchNTaskInfo stream id invalid. id:%u, stream list size:%zu.", true_stream_id,
davinci_model->GetStreamList().size());
return FAILED;
@@ -122,6 +143,9 @@ Status StreamSwitchNTaskInfo::GetTrueStreamPtr(const OpDescPtr &op_desc, Davinci
}

if (true_stream_list_.empty()) {
REPORT_INNER_ERROR("E19999", "active_stream_list.size():%zu in op:%s(%s) is empty, "
"check invalid when StreamSwitchNTaskInfo %s", true_stream_id_list.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "true stream list is null.");
return FAILED;
}
@@ -138,6 +162,9 @@ Status StreamSwitchNTaskInfo::CalculateArgs(const domi::TaskDef &task_def, Davin
GE_CHECK_NOTNULL(op_desc);
GELOGI("Calc opType[%s] args size. Node name is [%s]", op_desc->GetType().c_str(), op_desc->GetName().c_str());
if (op_desc->GetInputsSize() != kStreamSwitchnInputNum) {
REPORT_INNER_ERROR("E19999", "input size:%zu in op:%s(%s) != kStreamSwitchnInputNum:%u ,"
"check invalid when StreamSwitchNTaskInfo %s", op_desc->GetInputsSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), kStreamSwitchnInputNum, __FUNCTION__);
GELOGE(FAILED, "Stream switchn op only have one data input. Now input size is %zu", op_desc->GetInputsSize());
return FAILED;
}
@@ -159,6 +186,9 @@ Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciMo
const vector<int64_t> input_offset = op_desc->GetInputOffset();
const vector<int64_t> input_legnth = ModelUtils::GetInputSize(op_desc);
if (input_offset.empty() || input_legnth.empty()) {
REPORT_INNER_ERROR("E19999", "input_offset size:%zu or input_length.size:%zu in op:%s(%s) is empty,"
"check invalid when StreamSwitchNTaskInfo %s", input_offset.size(), input_legnth.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "input offset size %zu, input legnth size: %zu", input_offset.size(), input_legnth.size());
return FAILED;
}
@@ -170,6 +200,9 @@ Status StreamSwitchNTaskInfo::InputPtrUpdate(const OpDescPtr &op_desc, DavinciMo
} else {
auto input_data_addr = ModelUtils::GetInputDataAddrs(davinci_model->GetRuntimeParam(), op_desc);
if (input_data_addr.empty()) {
REPORT_INNER_ERROR("E19999", "input_data_addr size:%zu in op:%s(%s) is empty,"
"check invalid when StreamSwitchNTaskInfo %s", input_data_addr.size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "input data addr is empty");
return FAILED;
}


+ 3
- 3
ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc View File

@@ -27,21 +27,21 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {

rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X when %s",
sizeof(args), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), reinterpret_cast<void *>(args),
sizeof(args), RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%lu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X when %s",
sizeof(args), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream,
dump_flag);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag fail, dump_flag:%u, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtKernelLaunchWithFlag failed, dump_flag:%u, ret:0x%X when %s",
dump_flag, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtKernelLaunchWithFlag failied. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)


+ 5
- 5
ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc View File

@@ -36,14 +36,14 @@ Status SuperKernelFactory::Init() {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(this->sk_stub_name_.c_str(), &this->func_stub_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName fail, stub_func:%s, ret:0x%X, when %s",
REPORT_CALL_ERROR("E19999", "Call rtGetFunctionByName failed, stub_func:%s, ret:0x%X, when %s",
this->sk_stub_name_.c_str(), rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtGetFunctionByName failed. stub_func: %s, please export LD_LIBRARY_PATH for "
"libcce_aicore.so", this->sk_stub_name_.c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtGetAddrByFun(this->func_stub_, &this->func_ptr_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun fail, ret:0x%X, when %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun failed, ret:0x%X, when %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
GELOGD(
@@ -101,7 +101,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
void *sub_device_func = nullptr;
rt_ret = rtGetAddrByFun(stub_func_list[i], &sub_device_func);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun fail, ret:0x%X, when %s", rt_ret, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Call rtGetAddrByFun failed, ret:0x%X, when %s", rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtGetAddrByFun failed. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func);
@@ -114,14 +114,14 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
}
rt_ret = rtMalloc(reinterpret_cast<void **>(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMalloc fail, size:%lu, ret:0x%X, when %s",
REPORT_CALL_ERROR("E19999", "Call rtMalloc failed, size:%lu, ret:0x%X, when %s",
nav_table_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret = rtMemcpy(reinterpret_cast<void *>(hbm_nav_table_addr), nav_table_size,
reinterpret_cast<void *>(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
REPORT_CALL_ERROR("E19999", "Call rtMemcpy fail, size:%lu, ret:0x%X when %s",
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%lu, ret:0x%X when %s",
nav_table_size, rt_ret, __FUNCTION__);
GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)


+ 2
- 0
ge/graph/load/model_manager/task_info/task_info.cc View File

@@ -25,6 +25,8 @@ Status TaskInfo::SetStream(uint32_t stream_id, const std::vector<rtStream_t> &st
} else if (stream_list.size() > stream_id) {
stream_ = stream_list[stream_id];
} else {
REPORT_INNER_ERROR("E19999", "stream_id:%u >= stream_list.size(): %zu, check invalid when TaskInfo %s",
stream_id, stream_list.size(), __FUNCTION__);
GELOGE(FAILED, "index: %u >= stream_list.size(): %zu.", stream_id, stream_list.size());
return FAILED;
}


+ 8
- 0
ge/graph/load/model_manager/tbe_handle_store.cc View File

@@ -23,6 +23,7 @@
namespace ge {
void TbeHandleInfo::used_inc(uint32_t num) {
if (used_ > std::numeric_limits<uint32_t>::max() - num) {
REPORT_INNER_ERROR("E19999", "Used:%u reach numeric max when TbeHandleInfo %s", used_, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Used[%u] reach numeric max.", used_);
return;
}
@@ -32,6 +33,7 @@ void TbeHandleInfo::used_inc(uint32_t num) {

void TbeHandleInfo::used_dec(uint32_t num) {
if (used_ < std::numeric_limits<uint32_t>::min() + num) {
REPORT_INNER_ERROR("E19999", "Used:%u reach numeric min when TbeHandleInfo %s", used_, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Used[%u] reach numeric min.", used_);
return;
}
@@ -105,6 +107,8 @@ void TBEHandleStore::ReferTBEHandle(const std::string &name) {
std::lock_guard<std::mutex> lock(mutex_);
auto it = kernels_.find(name);
if (it == kernels_.end()) {
REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid when TbeHandleInfo %s",
name.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Kernel[%s] not found in stored.", name.c_str());
return;
}
@@ -124,6 +128,8 @@ void TBEHandleStore::EraseTBEHandle(const std::map<std::string, uint32_t> &names
for (auto &item : names) {
auto it = kernels_.find(item.first);
if (it == kernels_.end()) {
REPORT_INNER_ERROR("E19999", "Kernel:%s not found in stored check invalid when TbeHandleInfo %s",
item.first.c_str(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Kernel[%s] not found in stored.", item.first.c_str());
continue;
}
@@ -134,6 +140,8 @@ void TBEHandleStore::EraseTBEHandle(const std::map<std::string, uint32_t> &names
} else {
rtError_t rt_ret = rtDevBinaryUnRegister(info.handle());
if (rt_ret != RT_ERROR_NONE) {
REPORT_INNER_ERROR("E19999", "Call rtDevBinaryUnRegister failed for Kernel:%s fail, ret:0x%X, "
"when TbeHandleInfo %s", item.first.c_str(), rt_ret, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Kernel[%s] UnRegister handle fail:%u.", item.first.c_str(), rt_ret);
}
kernels_.erase(it);


+ 2
- 0
ge/graph/load/model_manager/zero_copy_offset.cc View File

@@ -76,6 +76,8 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list
auto tensor_desc = op_desc->GetInputDescPtr(idx);
GE_CHECK_NOTNULL(tensor_desc);
if (TensorUtils::GetTensorSizeInBytes(*tensor_desc, size) != GRAPH_SUCCESS) {
REPORT_INNER_ERROR("E19999", "Get input TensorSize in op:%s(%s) failed, input_index:%zu, when ZeroCopyOffset %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), idx, __FUNCTION__);
GELOGE(FAILED, "GetTensorSizeInBytes failed!");
return FAILED;
}


+ 4
- 0
ge/graph/load/model_manager/zero_copy_task.cc View File

@@ -36,6 +36,8 @@ ZeroCopyTask::~ZeroCopyTask() { args_addr_ = nullptr; }
*/
Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) {
if (offset + sizeof(uintptr_t) > args_size_) {
REPORT_INNER_ERROR("E19999", "Param offset:%zu + 8 > args_size_:%zu, check invalid when ZeroCopyOffset %s",
offset, args_size_, __FUNCTION__);
GELOGE(FAILED, "[ZCPY] %s set task args failed, args size: %zu, offset: %zu", name_.c_str(), args_size_, offset);
return FAILED; // unexpected error, need fix.
}
@@ -116,6 +118,8 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) {
}

if (rt_err != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpyAsync or rtMemcpy failed, size:%zu, ret: 0x%X when ZeroCopyTask %s",
args_size_, rt_err, __FUNCTION__);
GELOGE(RT_FAILED, "[ZCPY] %s distribute task param failed, error=0x%x", name_.c_str(), rt_err);
return RT_ERROR_TO_GE_STATUS(rt_err);
}


+ 11
- 0
ge/graph/manager/graph_context.cc View File

@@ -44,6 +44,7 @@ GraphContext::GraphContext(const GraphNodePtr &graph_node) {

Status GraphContext::SetComputeGraph(const GraphNodePtr &graph_node) {
if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Param graph_node is nullptr, check invalid when GraphContext %s", __FUNCTION__);
GELOGE(GE_GRAPH_PARAM_NULLPTR, "graphNode is NULL!");
return GE_GRAPH_PARAM_NULLPTR;
}
@@ -54,6 +55,8 @@ Status GraphContext::SetComputeGraph(const GraphNodePtr &graph_node) {
if (compute_graph_ == nullptr) {
std::shared_ptr<const ge::Graph> graph = graph_node->GetGraph();
if (graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Param graph in graph_node is nullptr, check invalid when GraphContext %s",
__FUNCTION__);
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "compute_graph by graphNode is NULL!");
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
}
@@ -70,11 +73,15 @@ Status GraphContext::Finalize() const { return SUCCESS; }

Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTensor &returned_tensor) {
if (var_data_name.empty()) {
REPORT_INNER_ERROR("E19999", "Param var_data_name is empty, check invalid when GraphContext %s",
__FUNCTION__);
GELOGE(GE_GRAPH_EMPTY_STRING_NAME, "Variable data name is empty!");
return GE_GRAPH_EMPTY_STRING_NAME;
}

if (GetVarNodeTensorTable().empty()) {
REPORT_INNER_ERROR("E19999", "VarNodeTensorTable is empty, var_data_name:%s, check invalid when GraphContext %s",
var_data_name.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE, "VarNodeTensorTable is empty!");
return GE_GRAPH_EMPTY_VARIABLE_TENSOR_TABLE;
}
@@ -83,6 +90,8 @@ Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTenso
returned_tensor.SetTensorDesc(var_record.second.GetTensorDesc());
auto ret = returned_tensor.SetData(var_record.second.GetData());
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "SetData to tensor fail, var_data_name:%s, when GraphContext %s",
var_data_name.c_str(), __FUNCTION__);
GELOGE(ret, "Set Tensor data failed!");
return ret;
}
@@ -91,6 +100,8 @@ Status GraphContext::GetVariableTensor(const std::string &var_data_name, GeTenso
}
}

REPORT_INNER_ERROR("E19999", "VarRecord with data_name:%s does not exist, check invalid when GraphContext %s",
var_data_name.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_VARIABLE_DOES_NOT_EXIST, "VarRecord with data_name %s does NOT exist!", var_data_name.c_str());

return GE_GRAPH_VARIABLE_DOES_NOT_EXIST;


+ 194
- 2
ge/graph/manager/graph_manager.cc View File

@@ -137,6 +137,7 @@ ge::Status CheckFpCeilingMode() {
auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode);
if (ret == ge::GRAPH_SUCCESS) {
if (kValidFpCeilingMode.count(mode) == 0) {
REPORT_INNER_ERROR("E19999", "Option ge.fpCeilingMode is invalid, value:%s, when %s", mode.c_str(), __FUNCTION__);
GELOGE(ge::GE_GRAPH_OPTIONS_INVALID, "The fp_ceiling_mode %s is invalid, options are 0, 1, and 2.", mode.c_str());
return ge::GE_GRAPH_OPTIONS_INVALID;
}
@@ -165,12 +166,14 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {
// malloc
graph_run_listener_ = MakeShared<GraphModelListener>(sync_run_mutex_, condition_);
if (graph_run_listener_ == nullptr) {
REPORT_INNER_ERROR("E19999", "New GraphModelListener fail when GraphManager %s", __FUNCTION__);
GELOGE(MEMALLOC_FAILED, "Make shared failed");
return MEMALLOC_FAILED;
}
// graph context
graph_context_ = MakeShared<GraphContext>();
if (graph_context_ == nullptr) {
REPORT_INNER_ERROR("E19999", "New GraphModelListener fail when GraphManager %s", __FUNCTION__);
GELOGE(MEMALLOC_FAILED, "Make shared failed.");
return MEMALLOC_FAILED;
}
@@ -292,6 +295,8 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) {
std::string op_type;
auto ret = GetOriginalType(node, op_type);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "GetOriginalType from op:%s fail when GraphManager %s",
node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Failed to get node %s original type.", node->GetName().c_str());
return FAILED;
}
@@ -322,6 +327,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
const std::map<std::string, std::string> &options,
const OmgContext &omg_context) {
if (HasGraphNode(graph_id)) {
REPORT_INNER_ERROR("E19999", "graph_id:%u is exist, check invalid when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
}
@@ -332,6 +338,8 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
bool graph_has_been_added = false;
if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added)
&& graph_has_been_added) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail when GraphManager %s",
ATTR_NAME_GRAPH_HAS_BEEN_ADDED.c_str(), graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST,
"[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
@@ -339,6 +347,8 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
(void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true);
compute_graph_ = compute_graph;
} else {
REPORT_INNER_ERROR("E19999", "compute_graph from graph:%u is nullptr, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "compute graph is null");
return FAILED;
}
@@ -355,10 +365,16 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
}

GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed");
GE_IF_BOOL_EXEC(graph_node == nullptr,
REPORT_INNER_ERROR("E19999", "New GraphNode fail, graph_id:%u, when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "GraphNode make shared failed");
return FAILED);
std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph);
GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed");
GE_IF_BOOL_EXEC(graph_ptr == nullptr,
REPORT_INNER_ERROR("E19999", "New Graph fail, graph_id:%u, when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "GraphPtr make shared failed");
return FAILED);
// update option about tuning graph
ParseOption(options, BUILD_MODE, options_.build_mode);
@@ -394,6 +410,7 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
const std::map<std::string, std::string> &options,
const OmgContext &omg_context) {
if (HasGraphNode(graph_id)) {
REPORT_INNER_ERROR("E19999", "graph_id:%u is exist, check invalid when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
}
@@ -403,11 +420,15 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
bool graph_has_been_added = false;
if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added)
&& graph_has_been_added) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail when GraphManager %s",
ATTR_NAME_GRAPH_HAS_BEEN_ADDED.c_str(), graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST,
"[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
}
} else {
REPORT_INNER_ERROR("E19999", "compute_graph from graph:%u is nullptr, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "compute graph is null");
return FAILED;
}
@@ -429,11 +450,15 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap

GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
if (graph_node == nullptr) {
REPORT_CALL_ERROR("E19999", "New GraphNode fail, graph_id:%u, when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "GraphNode make shared failed");
return FAILED;
}
std::shared_ptr<Graph> graph_ptr = GraphUtils::CreateGraphPtrFromComputeGraph(new_compute_graph);
if (graph_ptr == nullptr) {
REPORT_INNER_ERROR("E19999", "New Graph fail, graph_id:%u, when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "GraphPtr make shared failed");
return FAILED;
}
@@ -477,6 +502,8 @@ Status GraphManager::MergeSubGraph(ComputeGraphPtr &compute_graph, const ge::Com

Status ret_topo = compute_graph->TopologicalSorting();
if (ret_topo != SUCCESS) {
REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u, when GraphManager %s",
compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(ret_topo, "[GraphManager]: TopologicalSorting the merged graph failed.");
return ret_topo;
}
@@ -512,11 +539,15 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr
std::vector<NodePtr> output_nodes;
ComputeGraphPtr new_compute_graph = GraphUtils::CloneGraph(old_compute_graph, "", input_nodes, output_nodes);
if (new_compute_graph == nullptr) {
REPORT_CALL_ERROR("E19999", "CloneGraph fail, graph_id:%u, when GraphManager %s",
compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Clone graph failed.");
return INTERNAL_ERROR;
}
copy_graphs.emplace(old_compute_graph->GetName(), new_compute_graph);
if (!AttrUtils::SetBool(old_compute_graph, ATTR_NAME_NEED_LX_FUSION, true)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail when GraphManager %s",
ATTR_NAME_NEED_LX_FUSION.c_str(), old_compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "Set attr lx_fusion to graph failed.");
return INTERNAL_ERROR;
}
@@ -582,6 +613,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
for (size_t i = 0; i < vector_future.size(); ++i) {
Status ret_status = vector_future[i].get();
if (ret_status != SUCCESS) {
REPORT_CALL_ERROR("E19999", "subgraph %zu optimize failed, when GraphManager %s", i, __FUNCTION__);
GELOGE(ret_status, "subgraph %zu optimize failed", i);
return ret_status;
}
@@ -592,6 +624,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
bool GraphManager::CheckAllFusionOptimizeSuccess(const ComputeGraphPtr &compute_graph,
Graph2SubGraphInfoList &sub_graph_map) {
if (compute_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid when GraphManager %s", __FUNCTION__);
GELOGE(PARAM_INVALID, "Input param compute_graph is nullptr.");
return false;
}
@@ -631,6 +664,8 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_
for (const auto &subgraph : root_subgraph_list) {
auto iter = copy_graphs.find(subgraph->GetSubGraph()->GetName());
if (iter == copy_graphs.end()) {
REPORT_INNER_ERROR("E19999", "Can not find subgraph:%s in copy graphs, check invalid when GraphManager %s",
subgraph->GetSubGraph()->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Can not find subgraph:%s in copy graphs.", subgraph->GetSubGraph()->GetName().c_str());
return FAILED;
}
@@ -642,6 +677,8 @@ Status GraphManager::ReplaceSubgraphWithOriGraph(const ComputeGraphPtr &compute_
for (const auto &subgraph : subgraph_list) {
auto iter = copy_graphs.find(subgraph->GetSubGraph()->GetName());
if (iter == copy_graphs.end()) {
REPORT_INNER_ERROR("E19999", "Can not find subgraph:%s in copy graphs, check invalid when GraphManager %s",
subgraph->GetSubGraph()->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "Can not find subgraph:%s in copy graphs.", subgraph->GetSubGraph()->GetName().c_str());
return FAILED;
}
@@ -740,6 +777,8 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,

Status ret = compute_graph->TopologicalSorting();
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "TopologicalSorting fail, graph_id:%u, when GraphManager %s",
compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(ret, "Graph topological sort failed, ret:%d.", ret);
return ret;
}
@@ -755,11 +794,15 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint

rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCtxCreate faileded, session_id:%lu, graph_id:%u, mode:%d, when GraphManager %s",
session_id, graph_id, mode, __FUNCTION__);
GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return FAILED;
}
rt_ret = rtCtxSetCurrent(rt_context);
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtCtxSetCurrent failed, session_id:%lu, graph_id:%u, mode:%d, when GraphManager %s",
session_id, graph_id, mode, __FUNCTION__);
GELOGE(FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return FAILED;
}
@@ -874,6 +917,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
if (IsGraphNeedBuild(graph_node)) {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
if (graph_node->GetBuildFlag()) {
REPORT_INNER_ERROR("E19999", "Graph:%u has not build before, can't run directly, "
"check invalid when GraphManager %s", graph_node->GetGraphId(), __FUNCTION__);
GELOGE(PARAM_INVALID,
"The graph %u need to re-build, you should remove it from GE "
"first, then AddGraph again and rebuild it.",
@@ -1075,16 +1120,22 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[RunGraph] graph not exist, graph_id = %u.", graph_id);
return ret;
}

if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_NODE_NULL;
}

if (graph_node->GetRunFlag()) {
REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph_id:%u, "
"check invalid when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_ALREADY_RUNNING, "[RunGraph] graph already running, graph id = %u", graph_id);
return GE_GRAPH_ALREADY_RUNNING;
}
@@ -1097,6 +1148,8 @@ Status GraphManager::RunGraph(const GraphId &graph_id, const std::vector<GeTenso

GE_IF_BOOL_EXEC(GetTrainFlag(),
GE_IF_BOOL_EXEC(compute_graph_tmp == nullptr,
REPORT_CALL_ERROR("E19999", "compute_graph is nullptr in graph_node, graph_id:%u, "
"check invalid when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL,
"[RunGraph] compute_graph_tmp is NULL, graph id = %u.", graph_id);
return GE_GRAPH_GRAPH_NODE_NULL;))
@@ -1154,11 +1207,15 @@ Status GraphManager::GenerateInfershapeGraph(GraphId &graph_id) {
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[BuildGraph] graph not exist, graph_id = %u.", graph_id);
return ret;
}

if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id);
return GE_GRAPH_GRAPH_NODE_NULL;
}
@@ -1181,11 +1238,15 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[BuildGraph] graph not exist, graph_id = %u.", graph_id);
return ret;
}

if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id);
return GE_GRAPH_GRAPH_NODE_NULL;
}
@@ -1206,6 +1267,8 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const

auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
REPORT_INNER_ERROR("E19999", "GELib is not init before, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized");
return GE_CLI_GE_NOT_INITIALIZED;
}
@@ -1213,12 +1276,19 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const
OpsKernelInfoStorePtr kernel_info =
instance_ptr->OpsKernelManagerObj().GetOpsKernelInfoStore(op_desc->GetOpKernelLibName());
if (kernel_info == nullptr) {
REPORT_INNER_ERROR("E19999", "GetOpsKernelInfoStore fail for op:%s(%s), kernel_lib_name:%s, graph_id:%u, "
"check invalid when GraphManager %s", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
op_desc->GetOpKernelLibName().c_str(), graph_id, __FUNCTION__);
GELOGE(FAILED, "Get op kernel info store failed");
return FAILED;
}

ret = kernel_info->CompileOp(node_vec);
if (ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Call CompileOp fail for op:%s(%s), kernel_lib_name:%s, graph_id:%u, "
"check invalid when GraphManager %s", op_desc->GetName().c_str(), op_desc->GetType().c_str(),
op_desc->GetOpKernelLibName().c_str(), graph_id, __FUNCTION__);
GELOGE(FAILED, "Get op kernel info store failed");
GELOGE(ret, "Compile op failed, op = %s, graph_id = %u.", op_desc->GetName().c_str(), graph_id);
return ret;
}
@@ -1242,16 +1312,22 @@ Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTen
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[BuildGraph] graph not exist, graph_id = %u.", graph_id);
return ret;
}

if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[BuildGraph] graph node is NULL, graphId = %u.", graph_id);
return GE_GRAPH_GRAPH_NODE_NULL;
}

if (graph_node->GetRunFlag()) {
REPORT_INNER_ERROR("E19999", "Graph is already running, can't be run again, graph_id:%u, "
"check invalid when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_ALREADY_RUNNING, "[BuildGraph] graph already running, graph id = %u", graph_node->GetGraphId());
return GE_GRAPH_ALREADY_RUNNING;
}
@@ -1319,11 +1395,15 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id);
return GE_GRAPH_GRAPH_NOT_EXIST;
}

if ((graph_node == nullptr) || (graph_node->GetRunFlag())) {
REPORT_INNER_ERROR("E19999", "Graph:%u is running, can't be remove, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id);
return GE_GRAPH_GRAPH_IS_RUNNING;
}
@@ -1345,6 +1425,8 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {
GELOGI("UnloadModel via new ome.");
rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, graph_id:%u, when GraphManager %s",
GetContext().DeviceId(), graph_id, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.",
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
ret = FAILED;
@@ -1358,6 +1440,8 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset fail, device_id:%u, graph_id:%u, when GraphManager %s",
GetContext().DeviceId(), graph_id, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] unload model failed, modelId=%u, graphId=%u.",
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
ret = FAILED;
@@ -1374,6 +1458,8 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {
GELOGI("Unload model %u.", ge_root_model->GetModelId());
rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, graph_id:%u, when GraphManager %s",
GetContext().DeviceId(), graph_id, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
graph_id);
return FAILED;
@@ -1386,6 +1472,8 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s",
GetContext().DeviceId(), graph_id, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
graph_id);
ret = FAILED;
@@ -1572,6 +1660,8 @@ Status GraphManager::ParseOption(const std::map<std::string, std::string> &optio
} else if (flag == "1") {
option = true;
} else {
REPORT_INNER_ERROR("E19999", "Option:%s value:%s must be 0 or 1, check invalid when GraphManager %s",
key.c_str(), flag.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, it must be 0 or 1.", key.c_str(),
flag.c_str());
return GE_GRAPH_OPTIONS_INVALID;
@@ -1588,6 +1678,8 @@ Status GraphManager::ParseOption(const std::map<std::string, std::string> &optio
if (iter != options.end()) {
option = static_cast<int32_t>(std::strtol(iter->second.c_str(), &ptr, kDecimal));
if (ptr != nullptr && *ptr != '\0') {
REPORT_INNER_ERROR("E19999", "Option:%s value:%s must be int32_t type, check invalid when GraphManager %s",
key.c_str(), iter->second.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:%s, its value %s is invalid, must be int32_t type.", key.c_str(),
iter->second.c_str());
return GE_GRAPH_OPTIONS_INVALID;
@@ -1631,6 +1723,8 @@ Status GraphManager::ParseOption(const std::map<std::string, std::string> &optio
// split engine and num by :
size_t pos = engine_parallel.find(':');
if (pos == string::npos) {
REPORT_INNER_ERROR("E19999", "Option:%s, value:%s, engine and num must be connected by :, check invalid "
"when GraphManager %s", key.c_str(), engine_parallel.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"engine and num must be connected by :, "
"while your input is %s",
@@ -1664,6 +1758,8 @@ Status GraphManager::ParseOption(const std::map<std::string, std::string> &optio
Status GraphManager::CheckEngineName(const std::string &engine_name, const std::string &key,
const std::map<std::string, int> &option) {
if (engine_name.empty()) {
REPORT_INNER_ERROR("E19999", "Option:%s, param engine_name:%s is empty, check invalid when GraphManager %s",
key.c_str(), engine_name.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "engine name of %s is empty", key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
@@ -1674,6 +1770,8 @@ Status GraphManager::CheckEngineName(const std::string &engine_name, const std::

auto it_stream_repeat = option.find(engine_name);
if (it_stream_repeat != option.end()) {
REPORT_INNER_ERROR("E19999", "Option:%s, param engine_name:%s is repeated, check invalid when GraphManager %s",
key.c_str(), engine_name.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "engine : %s of %s is repeated", engine_name.c_str(), key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
@@ -1682,11 +1780,15 @@ Status GraphManager::CheckEngineName(const std::string &engine_name, const std::

Status GraphManager::ParseParallelNum(const std::string &parallel_num, const std::string &key, int &num) {
if (parallel_num.empty()) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is empty, check invalid when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num of %s is empty", key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
for (char c : parallel_num) {
if (!isdigit(c)) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is not digit, check invalid when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "%s input is invalid ", key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
@@ -1695,17 +1797,25 @@ Status GraphManager::ParseParallelNum(const std::string &parallel_num, const std
try {
num = std::stoi(parallel_num);
} catch (std::invalid_argument &) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is invalid argument, check when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is invalid argument", parallel_num.c_str(), key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
} catch (std::out_of_range &) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is out of range, check when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is out of range", parallel_num.c_str(), key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
} catch (...) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s is invalid argument, check when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s is invalid argument", parallel_num.c_str(), key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}

if (num < 1) {
REPORT_INNER_ERROR("E19999", "Option:%s, param parallel num:%s < 1, check invalid when GraphManager %s",
key.c_str(), parallel_num.c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_OPTIONS_INVALID, "parallel num : %s of %s must bigger than 0", parallel_num.c_str(), key.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
@@ -1733,6 +1843,8 @@ Status GraphManager::GetGraphNode(const GraphId &graph_id, GraphNodePtr &out) {
auto iter = graph_map_.find(graph_id);
if (iter == graph_map_.end()) {
out = nullptr;
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] graph not exist, graph_id= %u.", graph_id);
return GE_GRAPH_GRAPH_NOT_EXIST;
}
@@ -1753,6 +1865,8 @@ Status GraphManager::SummaryHandle(const GraphId &graph_id, std::vector<GeTensor
const std::map<uint32_t, std::map<string, size_t>> &whole_summary_output_indexes =
GetCompilerStages(graph_id).optimizer.GetSummaryOutputIndexes();
if (whole_summary_output_indexes.find(graph_id) == whole_summary_output_indexes.end()) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in whole_summary_output_indexes, check invalid "
"when GraphManager %s", graph_id, __FUNCTION__);
GELOGE(FAILED, "No Summary graph found in map.");
return FAILED;
}
@@ -1798,6 +1912,8 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap
}
}
if (netoutput == nullptr) {
REPORT_INNER_ERROR("E19999", "No netoutput node in graph:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "Netoutput is null.");
return FAILED;
}
@@ -1805,6 +1921,9 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap
std::string desc_name;
auto out_anchor = in->GetPeerOutAnchor();
if (out_anchor == nullptr) {
REPORT_INNER_ERROR("E19999", "Peer anchor of op:%s(%s), in_index:%u is nullptr, graph_id:%u, check invalid "
"when GraphManager %s", netoutput->GetName().c_str(), netoutput->GetType().c_str(),
in->GetIdx(), graph_id, __FUNCTION__);
GELOGE(FAILED, "out_anchor is null.");
return FAILED;
}
@@ -1812,6 +1931,9 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap
// find the variable node in graph
while (peer_node != nullptr && peer_node->GetType() != kVariable) {
if (peer_node->GetAllInDataAnchors().size() != 1) {
REPORT_INNER_ERROR("E19999", "More than one prior nodes of peer_node:%s(%s) in checkpoint Graph:%u, "
"check invalid when GraphManager %s",
peer_node->GetName().c_str(), peer_node->GetType().c_str(), graph_id, __FUNCTION__);
GELOGE(FAILED, "More than one prior nodes of peer_node %s in checkpoint Graph.", peer_node->GetName().c_str());
return FAILED;
}
@@ -1825,12 +1947,18 @@ Status GraphManager::CheckpointHandle(const GraphId &graph_id, const ComputeGrap
}
}
if (peer_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Peer anchor node of op:%s(%s), in_index:%u is nullptr, graph_id:%u, check invalid "
"when GraphManager %s", netoutput->GetName().c_str(), netoutput->GetType().c_str(),
in->GetIdx(), graph_id, __FUNCTION__);
GELOGE(FAILED, "No variable op found in one branch, checkpoint graph illegal.");
return FAILED;
}
desc_name = peer_node->GetName();
GELOGI("[GraphManager] CheckpointHandle, descName=%s.", desc_name.c_str());
if (in->GetIdx() >= static_cast<int>(outputs.size())) {
REPORT_INNER_ERROR("E19999", "in index:%u of op:%s(%s) is out of outputs.size:%zu range, graph_id:%u, "
"check invalid when GraphManager %s", in->GetIdx(), netoutput->GetName().c_str(),
netoutput->GetType().c_str(), outputs.size(), graph_id, __FUNCTION__);
GELOGE(FAILED, "variable index out of range.");
return FAILED;
}
@@ -1877,6 +2005,8 @@ Status GraphManager::PushSummaryData2ME(const GraphId &graph_id,
}
return iter->second(graph_id, tmp_summary_data);
}
REPORT_INNER_ERROR("E19999", "No summary callback found, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "[GraphManager] PushSummaryData2ME failed, not found summary callback.");
return FAILED;
}
@@ -1897,6 +2027,8 @@ Status GraphManager::PushSaveData2ME(const GraphId &graph_id, const std::map<std
}
return iter->second(graph_id, tmp_save_data);
}
REPORT_INNER_ERROR("E19999", "No checkpoint callback found, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(FAILED, "[GraphManager] PushSaveData2ME failed, not found checkpoint callback.");
return FAILED;
}
@@ -1925,6 +2057,8 @@ bool GraphManager::CheckVariableForCheckpointGraph(NodePtr &node) {
}
auto out = node->GetOutDataAnchor(0);
if (out == nullptr) {
REPORT_INNER_ERROR("E19999", "anchor index:0 of op:%s(%s) is nullptr, check invalid when GraphManager %s",
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(GE_GRAPH_PARAM_NULLPTR, "out is nullptr.");
return false;
}
@@ -1957,6 +2091,7 @@ static inline bool CheckConstanOpForCheckpointGraph(NodePtr &node) { return node

bool GraphManager::IsCheckpointGraph(ComputeGraphPtr &compute_graph) {
if (compute_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Param compute_graph is nullptr, check invalid when GraphManager %s", __FUNCTION__);
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[IsCheckpointGraph] computeGraph is nullptr.");
return false;
}
@@ -2091,6 +2226,8 @@ Status GraphManager::RemoveIsolatedConstInThisGraph(ge::ComputeGraphPtr &compute
if (n->GetOutAllNodes().empty() && n->GetInAllNodes().empty()) {
// it is an isolated constant, just remove it
if (GraphUtils::RemoveJustNode(compute_graph, n) != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Remove constant op:%s(%s) failed when GraphManager %s",
n->GetName().c_str(), n->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "remove constant %s failed.", n->GetName().c_str());
return FAILED;
}
@@ -2469,6 +2606,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
" Device[%u] free_memory_size[%ld]",
graph_node->GetGraphId(), memory_size, weight_size, GetContext().DeviceId(), free_memory);
if (ge::CheckInt64AddOverflow(memory_size, weight_size) != SUCCESS) {
REPORT_INNER_ERROR("E19999", "memory_size:%ld and weight_size:%ld will overflow after add, check invalid "
"when GraphManager %s", memory_size, weight_size, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "The sum of Memory size and weight size exceeds INT64_MAX");
return INTERNAL_ERROR;
}
@@ -2512,6 +2651,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
max_memory_size);
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s",
GetContext().DeviceId(), __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
continue;
}
@@ -2526,6 +2667,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s",
GetContext().DeviceId(), __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id);
continue;
}
@@ -2555,10 +2698,14 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore");
GE_CHECK_NOTNULL(compute_graph_tmp);
if (!AttrUtils::SetInt(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_ID, root_graph_id)) {
REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u, when GraphManager %s", ATTR_NAME_ROOT_GRAPH_ID.c_str(),
compute_graph_tmp->GetGraphID(), __FUNCTION__);
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id);
return FAILED;
}
if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) {
REPORT_CALL_ERROR("E19999", "Set Attr:%s to graph:%u, when GraphManager %s", ATTR_NAME_ROOT_GRAPH_NAME.c_str(),
compute_graph_tmp->GetGraphID(), __FUNCTION__);
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \
root_graph_name: %s.", root_graph_name.c_str());
return FAILED;
@@ -2578,6 +2725,8 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(),
pthread_self());
} else {
REPORT_INNER_ERROR("E19999", "Param sub_graph_info_ptr or graph_manager is nullptr when GraphManager %s",
__FUNCTION__);
GELOGE(FAILED, "graph_manager or sub_graph_info_ptr is nullptr");
return FAILED;
}
@@ -2791,10 +2940,16 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr>
}
GeAttrValue::INT index = 0;
if (!(AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, index))) {
REPORT_CALL_ERROR("E19999", "Get Attr:%s from op:%s(%s) fail when GraphManager %s", ATTR_NAME_INDEX.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "Get index from attr failed");
return PARAM_INVALID;
}
if (static_cast<size_t>(index) > input_tensor.size()) {
REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s) value:%ld > param input_tensor.size:%zu, "
"check invalid when GraphManager %s", ATTR_NAME_INDEX.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
index, input_tensor.size(), __FUNCTION__);
GELOGE(PARAM_INVALID, "The count of input tensor should be equal to the count of data.");
return PARAM_INVALID;
}
@@ -2942,6 +3097,8 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_
std::vector<ge::OutputTensorInfo> outputs;
auto compute_graph = GraphUtils::GetComputeGraph(*graph_node->GetGraph());
if (graph_manager == nullptr || compute_graph == nullptr) {
REPORT_INNER_ERROR("E19999", "Param graph_manager or compute_graph in graph_node is nullptr, "
"check invalid when GraphManager %s", __FUNCTION__);
GELOGE(GRAPH_FAILED, "[Analyze Mode] compute graph is null!");
callback(GRAPH_FAILED, outputs);
return;
@@ -2961,6 +3118,9 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_
len = input_desc->GetShape().GetShapeSize();
}
if (len < 0) {
REPORT_INNER_ERROR("E19999", "InputIndex:%zu ShapeSize:%ld of op:%s(%s) < 0, unknown shape is not support, "
"check invalid when GraphManager %s", i, len,
node->GetName().c_str(), node->GetType().c_str(), __FUNCTION__);
GELOGE(GRAPH_FAILED, "Analyze Mode does not support GEOP output unknown shape!");
callback(GRAPH_FAILED, outputs);
return;
@@ -2970,12 +3130,20 @@ void GraphManager::ReturnError(GraphManager *graph_manager, GraphNodePtr &graph_
}
auto size = GetSizeByDataType(input_desc->GetDataType());
if (size <= 0) {
REPORT_INNER_ERROR("E19999", "data_type:%s of op:%s(%s) is not support, input_index:%zu check invalid "
"when GraphManager %s",
ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(),
node->GetName().c_str(), node->GetType().c_str(), i, __FUNCTION__);
GELOGE(PARAM_INVALID, "Failed to get cube size, the data type %s is invalid",
ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str());
callback(GRAPH_FAILED, outputs);
return;
}
if (CheckInt64MulOverflow(len, static_cast<int64_t>(size)) != true) {
REPORT_INNER_ERROR("E19999", "shape_size:%ld of op:%s(%s) will overflow after multiply by "
"size:%u of data_type:%s, input_index:%zu, check invalid when GraphManager %s", len,
node->GetName().c_str(), node->GetType().c_str(), size,
ge::TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), i, __FUNCTION__);
GELOGE(MEMALLOC_FAILED, "int64 multiply happens overflow! a:%ld b:%d", len, size);
callback(GRAPH_FAILED, outputs);
return;
@@ -2998,11 +3166,15 @@ bool GraphManager::IsGraphNeedRebuild(uint32_t graph_id) {
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[RunGraph] graph not exist, graph_id=%u.", graph_id);
return true;
}

if (graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graphId=%u.", graph_id);
return true;
}
@@ -3017,11 +3189,15 @@ const map<std::string, std::string> *GraphManager::GetGraphOptions(uint32_t grap
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(ret, "[RunGraph] graph not exist, graph_id=%u.", graph_id);
return nullptr;
}

if (!graph_node) {
REPORT_INNER_ERROR("E19999", "Graph node is nullptr in graph_map, graph_id:%u, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NODE_NULL, "[RunGraph] graph node is NULL, graph_id=%u.", graph_id);
return nullptr;
}
@@ -3052,6 +3228,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
}
bool dynamic_shape_partitioned = false;
if (!AttrUtils::GetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s from graph:%u fail when GraphManager %s",
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(FAILED, "failed get dynamic shape partitioned flag on partitioned graph.");
return FAILED;
}
@@ -3109,6 +3287,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
if (AttrUtils::GetBool(compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
GELOGI("Compute graph %s get superkernel flag %d.", compute_graph->GetName().c_str(), off_superkernel);
if (!AttrUtils::SetBool(merged_compute_graph, ATTR_NAME_OFF_SUPERKERNEL_ATTR, off_superkernel)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail when GraphManager %s",
ATTR_NAME_OFF_SUPERKERNEL_ATTR.c_str(), compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(FAILED, "Compute graph %s set superkernel flag %d failed", merged_compute_graph->GetName().c_str(),
off_superkernel);
return FAILED;
@@ -3118,6 +3298,8 @@ Status GraphManager::OptimizeSubgraph(const GraphNodePtr &graph_node, ComputeGra
GE_DUMP(merged_compute_graph, "mergedComputeGraph");
compute_graph = merged_compute_graph;
if (!AttrUtils::SetBool(*compute_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, dynamic_shape_partitioned)) {
REPORT_INNER_ERROR("E19999", "Set Attr:%s to graph:%u fail when GraphManager %s",
ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED.c_str(), compute_graph->GetGraphID(), __FUNCTION__);
GELOGE(FAILED, "failed set dynamic shape partitioned flag on partitioned graph.");
return FAILED;
}
@@ -3231,6 +3413,8 @@ Status GraphManager::SaveVariables(const Graph &graph, const std::vector<std::st
if (!var_names.empty()) {
for (const auto &var_name : var_names) {
if (var_results.count(var_name) == 0) {
REPORT_INNER_ERROR("E19999", "Fetch Var:%s result value fail when GraphManager %s",
var_name.c_str(), __FUNCTION__);
GELOGE(FAILED, "Fetch var[%s] value failed.", var_name.c_str());
return FAILED;
} else {
@@ -3269,6 +3453,9 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector<
auto peer_node = out_anchor->GetOwnerNode();
while (peer_node->GetType() != VARIABLE) {
if (peer_node->GetAllInDataAnchors().size() != 1) {
REPORT_INNER_ERROR("E19999", "peer node:%s(%s) of netoutput has more than 1 input in checkpoint Graph, "
"check invalid when GraphManager %s",
peer_node->GetName().c_str(), peer_node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, "peer_node [%s] has more than 1 input in checkpoint Graph.", peer_node->GetName().c_str());
return FAILED;
}
@@ -3282,12 +3469,17 @@ Status GraphManager::SaveCheckPointResult(const Graph &graph, const std::vector<
}
}
if (peer_node->GetType() != VARIABLE) {
REPORT_INNER_ERROR("E19999", "peer node:%s(%s) of netoutput is not variable in checkpoint Graph, "
"check invalid when GraphManager %s",
peer_node->GetName().c_str(), peer_node->GetType().c_str(), __FUNCTION__);
GELOGE(FAILED, " peer_node %s is not variable in checkpoint Graph.", peer_node->GetName().c_str());
return FAILED;
}
auto var_name = peer_node->GetName();
GELOGI("[GraphManager] SaveVariables, varName is %s.", var_name.c_str());
if (in->GetIdx() >= static_cast<int>(outputs.size())) {
REPORT_INNER_ERROR("E19999", "In index:%u of netoutput is out of outputs.size:%zu range in checkpoint Graph, "
"check invalid when GraphManager %s", in->GetIdx(), outputs.size(), __FUNCTION__);
GELOGE(FAILED, "variable index[%d] out of range[%zu].", in->GetIdx(), outputs.size());
return FAILED;
}


+ 4
- 0
ge/graph/manager/util/debug.cc View File

@@ -63,12 +63,16 @@ Status Debug::DumpDevMem(const char *file, const void *addr, int64_t size) {
uint8_t *host_addr = nullptr;
rtError_t ret = rtMallocHost(reinterpret_cast<void **>(&host_addr), size);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMallocHost failed, size:%zu, ret: 0x%X when Debug %s",
size, ret, __FUNCTION__);
GELOGE(FAILED, "Call rt api rtMallocHost failed, ret: 0x%X", ret);
return FAILED;
}
GE_MAKE_GUARD_RTMEM(host_addr);
ret = rtMemcpy(host_addr, size, addr, size, RT_MEMCPY_DEVICE_TO_HOST);
if (ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtMemcpy failed, size:%zu, ret: 0x%X when Debug %s",
size, ret, __FUNCTION__);
GELOGE(FAILED, "Call rt api rtMemcpy failed, ret: 0x%X", ret);
return FAILED;
}


+ 37
- 2
ge/graph/manager/util/hcom_util.cc View File

@@ -40,6 +40,9 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc,
if (op_desc->GetType() == HCOMRECEIVE) {
bool ret = ge::AttrUtils::GetDataType(op_desc, HCOM_ATTR_DATA_TYPE, src_data_type);
if (ret == false) {
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when HcomOmeUtil %s",
HCOM_ATTR_DATA_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "op:HcomReceive, op desc no attr: dtype.");
return PARAM_INVALID;
}
@@ -51,6 +54,10 @@ Status HcomOmeUtil::GetHcclDataType(const ge::ConstOpDescPtr &op_desc,

auto iter = kConstOpHcclDataType.find(static_cast<int64_t>(src_data_type));
if (iter == kConstOpHcclDataType.end()) {
REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value data_type:%s, not support in kConstOpHcclDataType now, "
"check invalid when HcomOmeUtil %s", HCOM_ATTR_DATA_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
ge::TypeUtils::DataTypeToSerialString(src_data_type).c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID,
"HcomOmeUtil:: Node: %s Optype: %s HcomDataType cann't support! Current Davinci Data Type : %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(),
@@ -76,6 +83,8 @@ Status HcomOmeUtil::GetHcomCount(const ge::ConstOpDescPtr &op_desc, HcclDataType
int &count) {
GE_CHECK_NOTNULL(op_desc);
if (!IsHCOMOp(op_desc->GetType())) {
REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not hcom op, check invalid when HcomOmeUtil %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil:: operator is not Hcom operator.");
return PARAM_INVALID;
}
@@ -142,6 +151,8 @@ Status HcomOmeUtil::GetHorovodCount(const ge::ConstOpDescPtr &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
GE_CHECK_NOTNULL(op_desc);
if (!IsHorovodOp(op_desc->GetType())) {
REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not horovod op, check invalid when HcomOmeUtil %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil:: operator is not Horovod operator.");
return PARAM_INVALID;
}
@@ -213,7 +224,11 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl

if (IsHCOMOp(op_desc->GetType())) {
std::string hcom_op_type;
GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(op_desc, HCOM_ATTR_REDUCE_TYPE, hcom_op_type), return PARAM_INVALID,
GE_CHK_BOOL_EXEC(ge::AttrUtils::GetStr(op_desc, HCOM_ATTR_REDUCE_TYPE, hcom_op_type),
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when HcomOmeUtil %s",
HCOM_ATTR_REDUCE_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
return PARAM_INVALID,
"HcomOmeUtil:: Node: %s Optype: %s Get HCOM_ATTR_REDUCE_TYPE fail, not support!",
op_desc->GetName().c_str(), op_desc->GetType().c_str());

@@ -226,6 +241,9 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl
} else if (hcom_op_type == "sum") {
op_type = HCCL_REDUCE_SUM;
} else {
REPORT_INNER_ERROR("E19999", "Attr:%s in Op:%s(%s), hcom_op_type value:%s is not support now, "
"check invalid when HcomOmeUtil %s", HCOM_ATTR_REDUCE_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), hcom_op_type.c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil::Get HCOM_ATTR_REDUCE_TYPE fail, [%s] not support!", hcom_op_type.c_str());
return PARAM_INVALID;
}
@@ -234,12 +252,18 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl
if (IsHorovodOp(op_desc->GetType())) {
int64_t horovod_op_type;
GE_CHK_BOOL_EXEC(ge::AttrUtils::GetInt(op_desc, ATTR_HOROVOD_ATTR_REDUCE_TYPE, horovod_op_type),
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when HcomOmeUtil %s",
ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
return PARAM_INVALID,
"HcomOmeUtil:: Node: %s Optype: %s Get ATTR_HOROVOD_ATTR_REDUCE_TYPE fail, not support!",
op_desc->GetName().c_str(), op_desc->GetType().c_str());

auto iter = kHorovodRedOpToHcclRedOp.find(static_cast<HorovodReduceOp>(horovod_op_type));
if (iter == kHorovodRedOpToHcclRedOp.end()) {
REPORT_INNER_ERROR("E19999", "Attr:%s in Op:%s(%s), horovod_op_type value:%ld is not support now, "
"check invalid when HcomOmeUtil %s", ATTR_HOROVOD_ATTR_REDUCE_TYPE.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), horovod_op_type, __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil:: Node: %s Optype: %s HcomOpType cann't support! Current HcomOpType : %ld",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), horovod_op_type);
return PARAM_INVALID;
@@ -252,7 +276,11 @@ Status HcomOmeUtil::GetHcclOperationType(const ge::ConstOpDescPtr &op_desc, Hccl

Status HcomOmeUtil::GetHcclRootId(const ge::ConstOpDescPtr &op_desc, int64_t &root_id) {
GE_CHECK_NOTNULL(op_desc);
GE_CHK_BOOL_EXEC(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_ROOT_RANK, root_id), return PARAM_INVALID,
GE_CHK_BOOL_EXEC(ge::AttrUtils::GetInt(op_desc, HCOM_ATTR_ROOT_RANK, root_id),
REPORT_INNER_ERROR("E19999", "Get Attr:%s in op:%s(%s) fail when HcomOmeUtil %s",
HCOM_ATTR_ROOT_RANK.c_str(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
return PARAM_INVALID,
"HcomOmeUtil::Node %s Optype: %s Get HCOM_ATTR_ROOT_INDEX fail, not support!",
op_desc->GetName().c_str(), op_desc->GetType().c_str());

@@ -293,6 +321,9 @@ Status HcomOmeUtil::CheckKernelHcclInfo(const ge::ConstOpDescPtr &op_desc,
std::vector<GETaskKernelHcclInfo> &kernel_hccl_infos) {
GE_CHECK_NOTNULL(op_desc);
if (IsHCOMOp(op_desc->GetType()) && kernel_hccl_infos.size() != 1) {
REPORT_INNER_ERROR("E19999", "Op:%s(%s) is not hcom op or param kernel_hccl_infos.size:%zu != 1, "
"check invalid when HcomOmeUtil %s",
op_desc->GetName().c_str(), op_desc->GetType().c_str(), kernel_hccl_infos.size(), __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil:: in Hcom scenario, the number of GETaskKernelHcclInfo is invalid.");
return PARAM_INVALID;
}
@@ -302,6 +333,10 @@ Status HcomOmeUtil::CheckKernelHcclInfo(const ge::ConstOpDescPtr &op_desc,
return SUCCESS;
}
if (kernel_hccl_infos.empty() || op_desc->GetInputsSize() != kernel_hccl_infos.size()) {
REPORT_INNER_ERROR("E19999", "Param kernel_hccl_infos.size:%zu is empty or not equal to input_desc size:%zu "
"in op:%s(%s), check invalid when HcomOmeUtil %s",
kernel_hccl_infos.size(), op_desc->GetInputsSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str(), __FUNCTION__);
GELOGE(PARAM_INVALID, "HcomOmeUtil:: in Horovod scenario, the number of GETaskKernelHcclInfo is invalid.");
return PARAM_INVALID;
}


+ 8
- 7
inc/framework/common/debug/log.h View File

@@ -232,13 +232,14 @@
}

// If expr is not RT_ERROR_NONE, print the log and return
#define GE_CHK_RT_RET(expr) \
do { \
rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
return RT_ERROR_TO_GE_STATUS(_rt_ret); \
} \
#define GE_CHK_RT_RET(expr) \
do { \
rtError_t _rt_ret = (expr); \
if (_rt_ret != RT_ERROR_NONE) { \
REPORT_CALL_ERROR("E19999", "Call %s fail, ret: 0x%X when %s", #expr, _rt_ret, __FUNCTION__); \
DOMI_LOGE("Call rt api failed, ret: 0x%X", _rt_ret); \
return RT_ERROR_TO_GE_STATUS(_rt_ret); \
} \
} while (0);

// If expr is true, execute exec_expr without printing logs


+ 1
- 1
parser

@@ -1 +1 @@
Subproject commit 0b1cd5d98d1f80c119c4aa251216d837f9f7c359
Subproject commit ca27d2a9797d8ebae36fb82b9970c042d2a445bc

Loading…
Cancel
Save