Browse Source

report errormsg

tags/v1.3.0
liudingyan 4 years ago
parent
commit
2252abcce8
31 changed files with 905 additions and 717 deletions
  1. +9
    -11
      ge/hybrid/common/npu_memory_allocator.cc
  2. +2
    -2
      ge/hybrid/common/tensor_value.cc
  3. +1
    -2
      ge/hybrid/executor/hybrid_execution_context.cc
  4. +35
    -41
      ge/hybrid/executor/hybrid_model_async_executor.cc
  5. +10
    -14
      ge/hybrid/executor/hybrid_model_pipeline_executor.cc
  6. +3
    -4
      ge/hybrid/executor/hybrid_profiler.cc
  7. +1
    -1
      ge/hybrid/executor/node_done_manager.cc
  8. +7
    -14
      ge/hybrid/executor/node_state.cc
  9. +2
    -3
      ge/hybrid/executor/rt_callback_manager.cc
  10. +12
    -14
      ge/hybrid/executor/subgraph_context.cc
  11. +52
    -56
      ge/hybrid/executor/subgraph_executor.cc
  12. +21
    -22
      ge/hybrid/executor/worker/execution_engine.cc
  13. +21
    -22
      ge/hybrid/executor/worker/shape_inference_engine.cc
  14. +47
    -15
      ge/hybrid/model/hybrid_model.cc
  15. +78
    -93
      ge/hybrid/model/hybrid_model_builder.cc
  16. +24
    -18
      ge/hybrid/model/node_item.cc
  17. +21
    -11
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  18. +121
    -57
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  19. +10
    -10
      ge/hybrid/node_executor/aicore/aicore_task_builder.cc
  20. +5
    -6
      ge/hybrid/node_executor/aicore/aicore_task_compiler.cc
  21. +90
    -33
      ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
  22. +135
    -79
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  23. +43
    -41
      ge/ir_build/atc_ir_common.cc
  24. +3
    -2
      ge/ir_build/ge_ir_build.cc
  25. +3
    -3
      ge/omm/csa_interact.cc
  26. +9
    -8
      ge/opskernel_manager/ops_kernel_builder_manager.cc
  27. +22
    -22
      ge/opskernel_manager/ops_kernel_manager.cc
  28. +39
    -34
      ge/session/inner_session.cc
  29. +70
    -69
      ge/session/session_manager.cc
  30. +5
    -5
      ge/single_op/single_op_model.cc
  31. +4
    -5
      ge/single_op/stream_resource.cc

+ 9
- 11
ge/hybrid/common/npu_memory_allocator.cc View File

@@ -41,8 +41,7 @@ NpuMemoryAllocator *NpuMemoryAllocator::GetAllocator() {
auto rt_result = rtGetDevice(&device_id);
if (rt_result != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Get][Device] Failed, result:%d.", rt_result);
REPORT_INNER_ERROR("E19999", "rtGetDevice failed when NpuMemoryAllocator %s, result:%d.",
__FUNCTION__, rt_result);
REPORT_INNER_ERROR("E19999", "rtGetDevice failed, result:%d.", rt_result);
return nullptr;
}

@@ -62,8 +61,7 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
if (allocate_size == 0) {
GELOGE(MEMALLOC_FAILED, "[Check][Param:size_t]Memory size is 0, device_id = %u, size = %zu.",
device_id_, allocate_size);
REPORT_INNER_ERROR("E19999", "Memory size is 0, device_id = %u, size = %zu when %s.",
device_id_, allocate_size, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "Memory size is 0, device_id = %u, size = %zu.", device_id_, allocate_size);
return nullptr;
}

@@ -75,9 +73,9 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
} else {
if (allocate_size > kMaxHbmMemorySize) {
GELOGE(PARAM_INVALID, "[Check][Param:size_t]Invalid HBM memory size: %zu bigger than limit:%lu, check invalid.",
allocate_size, kMaxHbmMemorySize);
REPORT_CALL_ERROR("E19999", "Invalid HBM memory size: %zu bigger than limit:%lu, check invalid when %s.",
allocate_size, kMaxHbmMemorySize, __FUNCTION__);
allocate_size, kMaxHbmMemorySize);
REPORT_CALL_ERROR("E19999", "Invalid HBM memory size: %zu bigger than limit:%lu, check invalid.",
allocate_size, kMaxHbmMemorySize);
return nullptr;
}
void *try_reuse_addr = nullptr;
@@ -96,10 +94,10 @@ void *NpuMemoryAllocator::Allocate(std::size_t size, AllocationAttr *attr) {
.Malloc(allocate_size, reinterpret_cast<uint8_t *>(try_reuse_addr), device_id_);
}
if (buffer == nullptr) {
GELOGE(MEMALLOC_FAILED, "[Malloc][Memory] Failed, device_id = %u, size = %zu.",
device_id_, allocate_size);
REPORT_CALL_ERROR("E19999", "malloc memory failed, device_id = %u, size = %zu when %s.",
device_id_, allocate_size, __FUNCTION__);
GELOGE(MEMALLOC_FAILED, "[Malloc][Memory] Failed, device_id = %u, size = %zu",
device_id_, allocate_size);
REPORT_CALL_ERROR("E19999", "malloc memory failed, device_id = %u, size = %zu",
device_id_, allocate_size);
return nullptr;
}



+ 2
- 2
ge/hybrid/common/tensor_value.cc View File

@@ -33,7 +33,7 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(NpuMemoryAllocator *allocator

if (allocator == nullptr) {
GELOGE(INTERNAL_ERROR, "[Check][Param:NpuMemoryAllocator] allocator is NULL.");
REPORT_INNER_ERROR("E19999", "input allocator is NULL, when %s.", __FUNCTION__);
REPORT_INNER_ERROR("E19999", "input allocator is NULL.");
return nullptr;
}

@@ -44,7 +44,7 @@ std::unique_ptr<TensorBuffer> TensorBuffer::Create(NpuMemoryAllocator *allocator
buffer = allocator->Allocate(size, attr);
if (buffer == nullptr) {
GELOGE(MEMALLOC_FAILED, "[Allocate][Memory] Failed. size = %zu.", size);
REPORT_CALL_ERROR("E19999", "allocate failed, size = %zu, when %s.", size, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "allocate failed, size = %zu.", size);
return nullptr;
}



+ 1
- 2
ge/hybrid/executor/hybrid_execution_context.cc View File

@@ -60,8 +60,7 @@ Status GraphExecutionContext::Synchronize(rtStream_t rt_stream) {
}

GELOGE(RT_FAILED, "[Invoke][rtStreamSynchronize] failed, ret = %d", rt_ret);
REPORT_CALL_ERROR("E19999",
"invoke rtStreamSynchronize failed when GraphExecutionContext %s, ret = %d", __FUNCTION__, rt_ret);
REPORT_CALL_ERROR("E19999", "invoke rtStreamSynchronize failed, ret = %d", rt_ret);
return RT_FAILED;
}
} // namespace hybrid

+ 35
- 41
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -48,8 +48,7 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) {

Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
if (data_inputer_->Push(data) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Data queue is full, please call again later when %s, model_id %u.",
__FUNCTION__, model_id_);
REPORT_CALL_ERROR("E19999", "Data queue is full, please call again later, model_id %u.", model_id_);
GELOGE(domi::DATA_QUEUE_ISFULL,
"[Push][Data] Data queue is full, please call again later, model_id %u ", model_id_);
return domi::DATA_QUEUE_ISFULL;
@@ -62,8 +61,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
GELOGD("HybridModelExecutor::Start IN, has listener = %d", listener != nullptr);
std::lock_guard<std::mutex> lk(mu_);
if (run_flag_) {
REPORT_INNER_ERROR("E19999",
"Model already started when HybridModelAsyncExecutor %s, model_id:%u.", __FUNCTION__, model_id_);
REPORT_INNER_ERROR("E19999", "Model already started, model_id:%u.", model_id_);
GELOGE(INTERNAL_ERROR, "[Check][RunState] Model already started, model_id:%u.", model_id_);
return INTERNAL_ERROR;
}
@@ -209,8 +207,7 @@ Status HybridModelAsyncExecutor::HandleResult(Status exec_ret,

if (exec_ret != SUCCESS) {
GELOGE(exec_ret, "[Check][Param:Status] failed to execute graph. model_id = %u", model_id_);
REPORT_INNER_ERROR("E19999",
"failed to execute graph when HybridModelAsyncExecutor %s. model_id = %u", __FUNCTION__, model_id_);
REPORT_INNER_ERROR("E19999", "failed to execute graph. model_id = %u", model_id_);
return OnComputeDone(data_id, INTERNAL_ERROR, output_tensor_info_list);
}

@@ -247,11 +244,10 @@ Status HybridModelAsyncExecutor::SyncVarData() {
Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, HybridModelExecutor::ExecuteArgs &args) {
if (current_data.blobs.size() < input_tensor_desc_.size()) {
GELOGE(PARAM_INVALID,
"[Check][Size]Blob size mismatches, expect at least %zu, but got %zu, model_id = %u",
input_tensor_desc_.size(), current_data.blobs.size(), model_id_);
REPORT_INNER_ERROR("E19999",
"Blob size mismatches, expect at least %zu, but got %zu when HybridModelAsyncExecutor %s, model_id = %u.",
input_tensor_desc_.size(), current_data.blobs.size(), __FUNCTION__, model_id_);
"[Check][Size]Blob size mismatches, expect at least %zu, but got %zu, model_id = %u",
input_tensor_desc_.size(), current_data.blobs.size(), model_id_);
REPORT_INNER_ERROR("E19999", "Blob size mismatches, expect at least %zu, but got %zu, model_id = %u.",
input_tensor_desc_.size(), current_data.blobs.size(), model_id_);
return PARAM_INVALID;
}

@@ -264,11 +260,10 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
if (is_input_dynamic_[input_index]) {
if (input_index >= current_data.shapes.size()) {
GELOGE(PARAM_INVALID,
"[Check][Range]Shape index out of range, index = %zu, shape size = %zu model_id = %u.",
input_index, current_data.shapes.size(), model_id_);
REPORT_INNER_ERROR("E19999",
"Shape index out of range, index = %zu, shape size = %zu when HybridModelAsyncExecutor %s, model_id = %u.",
input_index, current_data.shapes.size(), __FUNCTION__, model_id_);
"[Check][Range]Shape index out of range, index = %zu, shape size = %zu model_id = %u.",
input_index, current_data.shapes.size(), model_id_);
REPORT_INNER_ERROR("E19999", "Shape index out of range, index = %zu, shape size = %zu, model_id = %u.",
input_index, current_data.shapes.size(), model_id_);
return PARAM_INVALID;
}
auto &tensor_desc = input_tensor_desc_[input_index];
@@ -283,12 +278,12 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
}
// range[k].second can be -1
if (shape.GetDim(k) < range[k].first || (range[k].second >= 0 && shape.GetDim(k) > range[k].second)) {
GELOGE(PARAM_INVALID,
"[Check][Range]Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld], model_id = %u.",
input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
REPORT_INNER_ERROR("E19999",
"Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld, range = [%ld, %ld], model_id = %u.",
input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
GELOGE(PARAM_INVALID, "[Check][Range]Dim out of range, shape idx = %zu, dim idx = %zu,"
"dim = %ld, range = [%ld, %ld], model_id = %u.",
input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
REPORT_INNER_ERROR("E19999", "Dim out of range, shape idx = %zu, dim idx = %zu, dim = %ld,"
"range = [%ld, %ld], model_id = %u.",
input_index, k, shape.GetDim(k), range[k].first, range[k].second, model_id_);
return PARAM_INVALID;
}
}
@@ -296,8 +291,9 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy
args.input_desc[input_index] = tensor_desc;
GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str());
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size),
"[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size, index = %zu, shape = [%s], model_id = %u.",
input_index, tensor_desc->GetShape().ToString().c_str(), model_id_);
"[Invoke][GetTensorMemorySizeInBytes]Failed to calc tensor size,"
"index = %zu, shape = [%s], model_id = %u.",
input_index, tensor_desc->GetShape().ToString().c_str(), model_id_);
GELOGD("Input tensor[%zu] size = %zu", input_index, tensor_size);
}

@@ -316,11 +312,11 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData &current_data, Hy

if (mem_size < data_buf.length) {
REPORT_INNER_ERROR("E19999",
"input data size(%lu) does not match model required size(%lu) when %s, ret failed, model_id = %u.",
data_buf.length, mem_size, __FUNCTION__, model_id_);
"input data size(%lu) does not match model required size(%lu), ret failed, model_id = %u.",
data_buf.length, mem_size, model_id_);
GELOGE(PARAM_INVALID,
"[Check][Size]input data size(%lu) does not match model required size(%lu), ret failed, model_id = %u.",
data_buf.length, mem_size, model_id_);
"[Check][Size]input data size(%lu) does not match model required size(%lu), ret failed, model_id = %u.",
data_buf.length, mem_size, model_id_);
return PARAM_INVALID;
}
if (data_buf.length > 0) {
@@ -391,11 +387,11 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
std::vector<TensorValue> &output_tensors = args.outputs;
if (output_tensor_desc_list.size() != output_tensors.size()) {
GELOGE(INTERNAL_ERROR,
"[Check][Size]Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, model_id = %u.",
output_tensor_desc_list.size(), output_tensors.size(), model_id_);
REPORT_INNER_ERROR("E19999", "Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, "
"when HybridModelAsyncExecutor %s, model_id = %u.",
output_tensor_desc_list.size(), output_tensors.size(), __FUNCTION__, model_id_);
"[Check][Size]Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, model_id = %u.",
output_tensor_desc_list.size(), output_tensors.size(), model_id_);
REPORT_INNER_ERROR("E19999",
"Output sizes mismatch. From op_desc = %zu, and from output tensors = %zu, model_id = %u.",
output_tensor_desc_list.size(), output_tensors.size(), model_id_);
return INTERNAL_ERROR;
}

@@ -410,7 +406,7 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
tensor_desc->GetFormat(),
tensor_desc->GetDataType(),
output_size),
"Failed to calc tensor size for output[%zu]. shape = [%s], type = %s, format = %s",
"[Calc][TensorMemSize]Failed for output[%zu]. shape = [%s], type = %s, format = %s",
i,
tensor_desc->GetShape().ToString().c_str(),
TypeUtils::DataTypeToSerialString(tensor_desc->GetDataType()).c_str(),
@@ -427,12 +423,10 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
GE_CHECK_LE(output_size, UINT32_MAX);
if (output_tensor.GetSize() < static_cast<size_t>(output_size)) {
GELOGE(INTERNAL_ERROR,
"[Check][Size]output[%zu] tensor size(%zu) is not enough for output shape [%s], model_id = %u.",
i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_);
REPORT_INNER_ERROR("E19999",
"output[%zu] tensor size(%zu) is not enough for output shape [%s] model_id = %u,"
" when HybridModelAsyncExecutor %s.",
i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_, __FUNCTION__);
"[Check][Size]output[%zu] tensor size(%zu) is not enough for output shape [%s], model_id = %u.",
i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_);
REPORT_INNER_ERROR("E19999", "output[%zu] tensor size(%zu) is not enough for output shape [%s] model_id = %u",
i, output_tensor.GetSize(), tensor_desc->GetShape().ToString().c_str(), model_id_);
return INTERNAL_ERROR;
}

@@ -569,7 +563,7 @@ Status HybridModelAsyncExecutor::DumpOpDebug() {
}
data_dumper_.SetLoopAddr(global_step, loop_per_iter, loop_cond);
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(),
"[Invoke][LoadDumpInfo] failed in hybrid engine, model_id = %u.", model_id_);
"[Invoke][LoadDumpInfo] failed in hybrid engine, model_id = %u.", model_id_);
GELOGD("Dump op debug SUCCESS in hybrid engine");
}
return SUCCESS;


+ 10
- 14
ge/hybrid/executor/hybrid_model_pipeline_executor.cc View File

@@ -69,10 +69,8 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
task_queue_.Pop(task_info);
GELOGD("[Executor: %d] Got task, stage = %d, iteration = %ld", id_, task_info.stage, task_info.iteration);
if (task_info.iteration >= pipe_config_->iteration_end) {
GELOGE(INTERNAL_ERROR, "[Check][Range][Executor: %d] Unexpected iteration: %ld.",
id_, task_info.iteration);
REPORT_INNER_ERROR("E19999", "[Executor: %d] Unexpected iteration: %ld when StageExecutor %s.",
id_, task_info.iteration, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "[Check][Range][Executor: %d] Unexpected iteration: %ld.", id_, task_info.iteration);
REPORT_INNER_ERROR("E19999", "[Executor: %d] Unexpected iteration: %ld.", id_, task_info.iteration);
return INTERNAL_ERROR;
}

@@ -89,7 +87,7 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
if (task_info.stage == 0) {
GELOGD("[Executor: %d] To ResetExecutionContext", id_);
GE_CHK_STATUS_RET(ResetExecutionContext(context_),
"[Invoke][ResetExecutionContext][Executor: %d] Failed to reset context", id_);
"[Invoke][ResetExecutionContext][Executor: %d] Failed to reset context", id_);
context_.iteration = task_info.iteration;
GE_CHK_STATUS_RET_NOLOG(SetInputs(inputs, input_desc));
}
@@ -107,10 +105,10 @@ Status StageExecutor::Start(const std::vector<TensorValue> &inputs, const std::v
auto sync_result = Synchronize();
if (sync_result != SUCCESS) {
GELOGE(sync_result,
"[Invoke][Synchronize][Executor: %d] Failed to sync result:%d. iteration = %ld",
id_, sync_result, task_info.iteration);
REPORT_CALL_ERROR("E19999", "[Executor: %d] Failed to sync result:%d when StageExecutor %s. iteration = %ld",
id_, sync_result, __FUNCTION__, task_info.iteration);
"[Invoke][Synchronize][Executor: %d] Failed to sync result:%d. iteration = %ld",
id_, sync_result, task_info.iteration);
REPORT_CALL_ERROR("E19999", "[Executor: %d] Failed to sync result:%d. iteration = %ld",
id_, sync_result, task_info.iteration);
context_.profiler->Dump(std::cout);
context_.callback_manager->Destroy();
RuntimeInferenceContext::DestroyContext(std::to_string(context_.context_id));
@@ -260,8 +258,7 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar
auto ret = futures[i].get();
if (ret != SUCCESS) {
GELOGE(ret, "[Check][Result][Executor: %zu] Failed to schedule tasks.", i);
REPORT_INNER_ERROR("E19999", "[Executor: %zu] Failed to schedule tasks when HybridModelPipelineExecutor %s.",
i, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "[Executor: %zu] Failed to schedule tasks.", i);
has_error = true;
continue;
}
@@ -270,8 +267,7 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar

if (ret != SUCCESS) {
GELOGE(ret, "[Invoke][Synchronize] failed for [Executor: %zu].", i);
REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result when HybridModelPipelineExecutor %s.",
i, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "[Executor: %zu] failed to Synchronize result.", i);
has_error = true;
continue;
}
@@ -288,7 +284,7 @@ Status HybridModelPipelineExecutor::Execute(HybridModelExecutor::ExecuteArgs &ar

if (has_error) {
GELOGE(FAILED, "[Check][Error]Error occurred while execution.");
REPORT_INNER_ERROR("E19999", "Error occurred while execution when HybridModelPipelineExecutor %s.", __FUNCTION__);
REPORT_INNER_ERROR("E19999", "Error occurred while execution.");
return FAILED;
}



+ 3
- 4
ge/hybrid/executor/hybrid_profiler.cc View File

@@ -41,7 +41,7 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
char buf[kEventDescMax];
if (vsnprintf_s(buf, kEventDescMax, kEventDescMax - 1, fmt, args) == -1) {
GELOGE(FAILED, "[Parse][Param:fmt]Format %s failed.", fmt);
REPORT_CALL_ERROR("E19999", "Parse Format %s failed when HybridProfiler %s.", fmt, __FUNCTION__);
REPORT_CALL_ERROR("E19999", "Parse Format %s failed.", fmt);
va_end(args);
return;
}
@@ -50,9 +50,8 @@ void HybridProfiler::RecordEvent(EventType event_type, const char *fmt, ...) {
auto index = counter_++;
if (index >= static_cast<int>(events_.size())) {
GELOGE(INTERNAL_ERROR,
"[Check][Range]index out of range. index = %d, max event size = %zu", index, events_.size());
REPORT_INNER_ERROR("E19999", "index out of range when HybridProfiler %s. index = %d, max event size = %zu",
__FUNCTION__, index, events_.size());
"[Check][Range]index out of range. index = %d, max event size = %zu", index, events_.size());
REPORT_INNER_ERROR("E19999", "index out of range. index = %d, max event size = %zu", index, events_.size());
return;
}
auto &evt = events_[index];


+ 1
- 1
ge/hybrid/executor/node_done_manager.cc View File

@@ -29,7 +29,7 @@ bool NodeDoneManager::Cond::Await() {
std::chrono::seconds(kDefaultWaitTimeoutInSec),
[&]() { return is_released_ || is_cancelled_; })) {
GELOGE(INTERNAL_ERROR, "[Invoke][wait_for]Wait timed out.");
REPORT_INNER_ERROR("E19999", "wait timed out[%d] when %s.", kDefaultWaitTimeoutInSec, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "wait timed out[%d].", kDefaultWaitTimeoutInSec);
return false;
}



+ 7
- 14
ge/hybrid/executor/node_state.cc View File

@@ -67,10 +67,8 @@ Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target
Format format = input_desc.GetFormat();
DataType data_type = input_desc.GetDataType();
if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) {
GELOGE(FAILED, "[Invoke][CalcTensorMemSize] failed for [%s].",
node_item.NodeName().c_str());
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed for [%s] when ShapeInferenceState %s.",
node_item.NodeName().c_str(), __FUNCTION__);
GELOGE(FAILED, "[Invoke][CalcTensorMemSize] failed for [%s].", node_item.NodeName().c_str());
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed for [%s].", node_item.NodeName().c_str());
return FAILED;
}
}
@@ -124,19 +122,15 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
}

if (context.GetStatus() != SUCCESS) {
GELOGE(FAILED, "[Check][Status][%s] Await pending shape cancelled.",
node_item.NodeName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Await pending shape cancelled when %s.",
node_item.NodeName().c_str(), __FUNCTION__);
GELOGE(FAILED, "[Check][Status][%s] Await pending shape cancelled.", node_item.NodeName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Await pending shape cancelled.", node_item.NodeName().c_str());
break;
}
}

if (!wait_success) {
GELOGE(FAILED, "[Check][Status][%s] Wait for shape timeout:%d.",
node_item.NodeName().c_str(), kWaitInternal);
REPORT_CALL_ERROR("E19999", "[%s] Wait for shape timeout:%d when %s.",
node_item.NodeName().c_str(), kWaitInternal, __FUNCTION__);
GELOGE(FAILED, "[Check][Status][%s] Wait for shape timeout:%d.", node_item.NodeName().c_str(), kWaitInternal);
REPORT_CALL_ERROR("E19999", "[%s] Wait for shape timeout:%d.", node_item.NodeName().c_str(), kWaitInternal);
return FAILED;
}
}
@@ -240,8 +234,7 @@ Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const {
Status NodeState::WaitForPrepareDone() {
if (prepare_future_.valid()) {
GELOGD("[%s] Start to wait for prepare future.", GetName().c_str());
GE_CHK_STATUS_RET(prepare_future_.get(),
"[Check][Status][%s] PreRun failed.", GetName().c_str());
GE_CHK_STATUS_RET(prepare_future_.get(), "[Check][Status][%s] PreRun failed.", GetName().c_str());
}

return SUCCESS;


+ 2
- 3
ge/hybrid/executor/rt_callback_manager.cc View File

@@ -28,7 +28,7 @@ Status CallbackManager::RegisterCallback(rtStream_t stream, rtCallback_t callbac
auto rt_ret = rtEventRecord(event, stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Invoke][rtEventRecord] failed, error code = %d", rt_ret);
REPORT_CALL_ERROR("E19999", "Invoke rtEventRecord failed when %s, error code = %d", __FUNCTION__, rt_ret);
REPORT_CALL_ERROR("E19999", "Invoke rtEventRecord failed, error code = %d", rt_ret);
(void) rtEventDestroy(event);
return RT_FAILED;
}
@@ -76,8 +76,7 @@ Status CallbackManager::CallbackProcess(rtContext_t context) {
auto rt_err = rtEventSynchronize(event);
if (rt_err != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Invoke][rtEventSynchronize] failed. ret = %d", rt_err);
REPORT_CALL_ERROR("E19999",
"Invoke rtEventSynchronize failed when CallbackManager %s, ret = %d.", __FUNCTION__, rt_err);
REPORT_CALL_ERROR("E19999", "Invoke rtEventSynchronize failed, ret = %d.", rt_err);
GE_CHK_RT(rtEventDestroy(event));
return RT_FAILED;
}


+ 12
- 14
ge/hybrid/executor/subgraph_context.cc View File

@@ -50,11 +50,10 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) {
Status SubgraphContext::SetInput(int index, const TensorValue &tensor) {
if (static_cast<size_t>(index) >= all_inputs_.size()) {
GELOGE(INTERNAL_ERROR,
"[Check][Param:index]input index out of range. all input num = %zu, input index = %d",
all_inputs_.size(), index);
REPORT_INNER_ERROR("E19999",
"input param index out of range when SubgraphContext %s, all input num = %zu, input index = %d.",
__FUNCTION__, all_inputs_.size(), index);
"[Check][Param:index]input index out of range. all input num = %zu, input index = %d",
all_inputs_.size(), index);
REPORT_INNER_ERROR("E19999", "input param index out of range, all input num = %zu, input index = %d.",
all_inputs_.size(), index);
return INTERNAL_ERROR;
}
all_inputs_[index] = tensor;
@@ -69,12 +68,11 @@ Status SubgraphContext::SetInput(const NodeItem &node_item, int input_index, con
Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, const TensorValue &tensor) {
auto index = node_item.output_start + output_index;
if ((output_index >= node_item.num_outputs) || (static_cast<size_t>(index) >= all_outputs_.size())) {
GELOGE(INTERNAL_ERROR,
"[Check][Param:output_index]output index out of range. all output num = %zu, node_item = %s,"
"output index = %d.", all_outputs_.size(), node_item.DebugString().c_str(), output_index);
REPORT_INNER_ERROR("E19999", "output index out of range when SubgraphContext %s. "
"all output num = %zu, node_item = %s, output index = %d.",
__FUNCTION__, all_outputs_.size(), node_item.DebugString().c_str(), output_index);
GELOGE(INTERNAL_ERROR, "[Check][Param:output_index]output index out of range. all output num = %zu,"
"node_item = %s, output index = %d.",
all_outputs_.size(), node_item.DebugString().c_str(), output_index);
REPORT_INNER_ERROR("E19999", "output index out of range. all output num = %zu, node_item = %s, output index = %d.",
all_outputs_.size(), node_item.DebugString().c_str(), output_index);
return INTERNAL_ERROR;
}

@@ -130,9 +128,9 @@ Status SubgraphContext::Await(const NodePtr &node) {
void SubgraphContext::OnError(Status error) {
if (error != END_OF_SEQUENCE) {
GELOGE(error, "[Check][Param:error][%s] Error:%d occurred while executing graph.",
graph_item_->GetName().c_str(), error);
REPORT_INNER_ERROR("E19999", "[%s] Error:%d occurred while executing graph when SubgraphContext %s.",
graph_item_->GetName().c_str(), error, __FUNCTION__);
graph_item_->GetName().c_str(), error);
REPORT_INNER_ERROR("E19999", "[%s] Error:%d occurred while executing graph.",
graph_item_->GetName().c_str(), error);
}
node_done_manager_.Destroy();
}


+ 52
- 56
ge/hybrid/executor/subgraph_executor.cc View File

@@ -69,12 +69,11 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
auto input_nodes = graph_item_->GetInputNodes();
if (inputs.size() < input_nodes.size()) {
GELOGE(INTERNAL_ERROR,
"[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
"[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
REPORT_INNER_ERROR("E19999",
"[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs,"
"check invalid when SubgraphExecutor %s.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size(), __FUNCTION__);
"[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
return INTERNAL_ERROR;
}

@@ -93,7 +92,8 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
input_tensor.DebugString().c_str());

GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
"[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]", graph_item_->GetName().c_str(), i);
"[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]",
graph_item_->GetName().c_str(), i);

if (force_infer_shape_ || input_node->is_dynamic) {
GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
@@ -115,13 +115,12 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
for (size_t i = 0; i < input_index_mapping.size(); ++i) {
auto &parent_input_index = input_index_mapping[i];
if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
GELOGE(INTERNAL_ERROR,
"[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
REPORT_INNER_ERROR("E19999",
"[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs,"
"check invalid when %s.",
graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1, __FUNCTION__);
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph"
"which needs at lease [%d] inputs", graph_item_->GetName().c_str(), inputs.size(),
parent_input_index + 1);
REPORT_INNER_ERROR("E19999", "[%s] Number of inputs [%zu] is not sufficient for subgraph"
"which needs at lease [%d] inputs",
graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
return INTERNAL_ERROR;
}

@@ -144,7 +143,7 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[Invoke][Init]failed for [%s].", graph_item_->GetName().c_str());
if (!outputs.empty()) {
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
"[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
"[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
}
if (!graph_item_->IsDynamic()) {
return ExecuteAsyncForKnownShape(inputs);
@@ -163,10 +162,10 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) {
GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str());
if (graph_item_->GetAllNodes().size() != 1) {
GELOGE(INTERNAL_ERROR,
"[%s] Invalid known shape subgraph. node size = %zu",
graph_item_->GetName().c_str(),
graph_item_->GetAllNodes().size());
REPORT_INNER_ERROR("E19999", "[%s] Invalid known shape subgraph. node size = %zu",
graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Invalid known shape subgraph. node size = %zu",
graph_item_->GetName().c_str(), graph_item_->GetAllNodes().size());
return INTERNAL_ERROR;
}

@@ -198,12 +197,12 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
input_desc.emplace_back(task_context.GetInputDesc(i));
}

GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc),
"[Invoke][ExecuteAsync] failed for [%s].", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc), "[Invoke][ExecuteAsync] failed for [%s].",
graph_item_->GetName().c_str());

GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context),
"[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
graph_item_->GetName().c_str());
"[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
graph_item_->GetName().c_str());
return SUCCESS;
}

@@ -243,7 +242,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
if (node_item.kernel_task == nullptr) {
GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
"[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
"[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
} else {
node_state->SetKernelTask(node_item.kernel_task);
}
@@ -253,8 +252,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
const auto &task = node_state->GetKernelTask();
if (task == nullptr) {
GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s when %s, nodetask is null.",
node_state->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "GetKernelTask failed for %s, nodetask is null.", node_state->GetName().c_str());
return INTERNAL_ERROR;
}
auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -268,9 +266,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {
return SUCCESS;
}
GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
graph_item_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes when %s.",
graph_item_->GetName().c_str(), __FUNCTION__);
graph_item_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes.",
graph_item_->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -283,9 +281,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {

Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
"[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
"[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
"[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
"[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
return SUCCESS;
}

@@ -293,7 +291,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
auto &node_item = *node_state.GetNodeItem();
if (node_item.kernel_task == nullptr) {
GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx),
"[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
"[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
} else {
node_state.SetKernelTask(node_item.kernel_task);
}
@@ -302,8 +300,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
const auto &task = node_state.GetKernelTask();
if (task == nullptr) {
GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null when %s.",
node_state.GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null.", node_state.GetName().c_str());
return INTERNAL_ERROR;
}
auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@@ -320,7 +317,7 @@ Status SubgraphExecutor::LaunchTasks() {
NodeState *node_state = nullptr;
if (!ready_queue_.Pop(node_state)) {
GELOGE(INTERNAL_ERROR, "[Invoke][Pop] failed for [%s].", graph_item_->GetName().c_str());
REPORT_CALL_ERROR("E19999", "invoke pop failed for %s when %s", graph_item_->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "invoke pop failed for %s.", graph_item_->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -345,7 +342,7 @@ Status SubgraphExecutor::LaunchTasks() {
GE_CHECK_NOTNULL(shared_task_context);
shared_task_context->SetForceInferShape(force_infer_shape_);
HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
"[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
"[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
}
}
@@ -370,8 +367,8 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
return ret;
}

GE_CHK_STATUS_RET(prepare_future.get(),
"[Invoke][get] [%s] Error occurred in task preparation.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(prepare_future.get(), "[Invoke][get] [%s] Error occurred in task preparation.",
graph_item_->GetName().c_str());

GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
return SUCCESS;
@@ -386,14 +383,13 @@ Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<Co

// copy output data from op to designated position
GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc),
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.",
graph_item_->GetName().c_str());
if (outputs.size() != output_desc.size()) {
GELOGE(INTERNAL_ERROR,
"[Check][Size]Number of outputs(%zu) mismatch number of output_desc(%zu).",
outputs.size(), output_desc.size());
REPORT_INNER_ERROR("E19999", "Number of outputs(%zu) mismatch number of output_desc(%zu),"
"check invlid when SubgraphExecutor %s.",
outputs.size(), output_desc.size(), __FUNCTION__);
GELOGE(INTERNAL_ERROR, "[Check][Size]Number of outputs(%zu) mismatch number of output_desc(%zu).",
outputs.size(), output_desc.size());
REPORT_INNER_ERROR("E19999", "Number of outputs(%zu) mismatch number of output_desc(%zu).",
outputs.size(), output_desc.size());
return INTERNAL_ERROR;
}
return SUCCESS;
@@ -410,18 +406,17 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
// get output tensors and tensor desc list
std::vector<TensorValue> outputs;
std::vector<ConstGeTensorDescPtr> output_desc_list;
GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs),
"[Invoke][GetOutputs][%s] Failed to get output tensors.", graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs), "[Invoke][GetOutputs][%s] Failed to get output tensors.",
graph_item_->GetName().c_str());
GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list),
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());
"[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.",
graph_item_->GetName().c_str());

if (outputs.size() != output_desc_list.size()) {
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] num of output tensors = %zu, num of output tensor desc = %zu not equal",
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
REPORT_INNER_ERROR("E19999",
"%s num of output tensors = %zu, num of output tensor desc = %zu not equal,"
"check invalid when SubgraphExecutor %s",
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size(), __FUNCTION__);
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
REPORT_INNER_ERROR("E19999", "%s num of output tensors = %zu, num of output tensor desc = %zu not equal",
graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
return INTERNAL_ERROR;
}

@@ -471,9 +466,9 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
if (outputs.size() != output_edges.size()) {
GELOGE(PARAM_INVALID, "[Check][Size]Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(), outputs.size());
REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu when %s",
output_edges.size(), outputs.size(), __FUNCTION__);
output_edges.size(), outputs.size());
REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu",
output_edges.size(), outputs.size());
return PARAM_INVALID;
}

@@ -489,7 +484,8 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
output_tensor.DebugString().c_str());

GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
"[Invoke][SetOutput][%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i);
"[Invoke][SetOutput][%s] Failed to set input tensor[%zu]",
graph_item_->GetName().c_str(), i);
}

GELOGD("Done enabling zero copy for outputs successfully.");


+ 21
- 22
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -106,9 +106,9 @@ Status NodeDoneCallback::PrepareConstInputs(const NodeItem &node_item) {
node_item.NodeName().c_str(), output_idx, tensor_size,
output_tensor->DebugString().c_str());
REPORT_INNER_ERROR("E19999",
"[%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s when %s.",
node_item.NodeName().c_str(), output_idx, tensor_size,
output_tensor->DebugString().c_str(), __FUNCTION__);
"[%s] Tensor size is not enough. output index = %d, required size = %ld, tensor = %s.",
node_item.NodeName().c_str(), output_idx, tensor_size,
output_tensor->DebugString().c_str());
return INTERNAL_ERROR;
}

@@ -176,7 +176,7 @@ Status NodeDoneCallback::ProfilingReport() {
auto node = context_->GetNodeItem().node;
if (node == nullptr) {
GELOGE(PARAM_INVALID, "[Get][Node] value is nullptr.");
REPORT_INNER_ERROR("E19999", "Get node failed, when %s.", __FUNCTION__);
REPORT_INNER_ERROR("E19999", "TaskContext GetNodeItem value is nullptr.");
return PARAM_INVALID;
}

@@ -194,7 +194,7 @@ Status NodeDoneCallback::ProfilingReport() {
auto profiling_ret = GetTaskDescInfo(node, model, task_desc_info);
if (profiling_ret != RT_ERROR_NONE) {
GELOGE(profiling_ret, "[Get][TaskDescInfo] of node:%s failed.", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "GetTaskDescInfo of node:%s failed, when %s.", node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "GetTaskDescInfo of node:%s failed.", node->GetName().c_str());
return profiling_ret;
}

@@ -207,7 +207,7 @@ Status NodeDoneCallback::DumpDynamicNode() {
auto node = context_->GetNodeItem().node;
if (node == nullptr) {
GELOGE(PARAM_INVALID, "[Get][Node] value is nullptr.");
REPORT_INNER_ERROR("E19999", "get node is nullptr when %s.", __FUNCTION__);
REPORT_INNER_ERROR("E19999", "get node value is nullptr.");
return PARAM_INVALID;
}
auto op_desc = node->GetOpDesc();
@@ -217,7 +217,7 @@ Status NodeDoneCallback::DumpDynamicNode() {
std::string dynamic_model_name = model->GetModelName();
std::string dynamic_om_name = model->GetOmName();
uint32_t model_id = model->GetModelId();
if(!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) {
if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) {
GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str());
return SUCCESS;
}
@@ -260,7 +260,7 @@ Status NodeDoneCallback::DumpDynamicNode() {
auto rt_ret = rtStreamSynchronize(stream);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "[Call][rtStreamSynchronize] failed, ret = %d.", rt_ret);
REPORT_CALL_ERROR("E19999", "call rtStreamSynchronize failed when %s, ret = %d.", __FUNCTION__, rt_ret);
REPORT_CALL_ERROR("E19999", "call rtStreamSynchronize failed, ret = %d.", rt_ret);
return rt_ret;
}
return SUCCESS;
@@ -279,8 +279,7 @@ Status NodeDoneCallback::OnNodeDone() {
}

if (ProfilingManager::Instance().ProfilingModelExecuteOn()) {
GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.",
node_item.NodeName().c_str());
GE_CHK_STATUS_RET(ProfilingReport(), "[Report][Profiling] of node[%s] failed.", node_item.NodeName().c_str());
}

// release workspace
@@ -302,8 +301,8 @@ Status NodeDoneCallback::OnNodeDone() {
(void) LogOutputs(node_item, *context_);
}

GE_CHK_STATUS_RET(context_->PropagateOutputs(),
"[Propagate][Outputs] of [%s] failed.", node_item.NodeName().c_str());
GE_CHK_STATUS_RET(context_->PropagateOutputs(), "[Propagate][Outputs] of [%s] failed.",
node_item.NodeName().c_str());

RECORD_CALLBACK_EVENT(graph_context_, context_->GetNodeName(), "[PropagateOutputs] End");
}
@@ -344,7 +343,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
const auto &task = node_state.GetKernelTask();
if (task == nullptr) {
GELOGE(INTERNAL_ERROR, "[Get][KernelTask] of [%s] is null.", node_state.GetName().c_str());
REPORT_INNER_ERROR("E19999", "GetKernelTask of %s is null when %s.", node_state.GetName().c_str(), __FUNCTION__);
REPORT_INNER_ERROR("E19999", "GetKernelTask of %s is null.", node_state.GetName().c_str());
return INTERNAL_ERROR;
}

@@ -358,8 +357,8 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
auto executor = node_item.node_executor;
GE_CHECK_NOTNULL(executor);
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] Start");
GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context),
"[Prepare][Task] for [%s] failed.", node_state.GetName().c_str());
GE_CHK_STATUS_RET(executor->PrepareTask(*task, task_context), "[Prepare][Task] for [%s] failed.",
node_state.GetName().c_str());
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PrepareTask] End");
GELOGD("[%s] Done task preparation successfully.", node_state.GetName().c_str());

@@ -371,7 +370,7 @@ Status ExecutionEngine::DoExecuteAsync(NodeState &node_state,
}

GE_CHK_STATUS_RET(ValidateInputTensors(node_state, task_context), "[Validate][InputTensors] for %s failed.",
node_state.GetName().c_str());
node_state.GetName().c_str());
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[ValidateInputTensors] End");

if (context.profiling_level > 0) {
@@ -425,10 +424,10 @@ Status ExecutionEngine::ValidateInputTensors(const NodeState &node_state, const
input_tensor->GetSize());
} else {
GELOGE(INTERNAL_ERROR,
"[Check][Size] for [%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu.",
task_context.GetNodeName(), i, expected_size, input_tensor->GetSize());
REPORT_INNER_ERROR("E19999", "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu when %s.",
task_context.GetNodeName(), i, expected_size, input_tensor->GetSize(), __FUNCTION__);
"[Check][Size] for [%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu.",
task_context.GetNodeName(), i, expected_size, input_tensor->GetSize());
REPORT_INNER_ERROR("E19999", "[%s] Input[%d]: tensor size mismatches. expected: %ld, but given %zu.",
task_context.GetNodeName(), i, expected_size, input_tensor->GetSize());
return INTERNAL_ERROR;
}
}
@@ -441,8 +440,8 @@ Status ExecutionEngine::PropagateOutputs(const NodeItem &node_item,
TaskContext &task_context,
GraphExecutionContext &context) {
if (node_item.shape_inference_type != DEPEND_COMPUTE) {
GE_CHK_STATUS_RET(task_context.PropagateOutputs(),
"[Propagate][Outputs] for [%s] failed.", node_item.NodeName().c_str());
GE_CHK_STATUS_RET(task_context.PropagateOutputs(), "[Propagate][Outputs] for [%s] failed.",
node_item.NodeName().c_str());
RECORD_EXECUTION_EVENT(&context, task_context.GetNodeName(), "[PropagateOutputs] End");
GELOGD("[%s] Done propagating outputs successfully.", node_item.NodeName().c_str());
}


+ 21
- 22
ge/hybrid/executor/worker/shape_inference_engine.cc View File

@@ -205,8 +205,7 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
auto peer_input_desc = peer_op_desc->MutableInputDesc(peer_anchor->GetIdx());
if (peer_input_desc == nullptr) {
GELOGE(GRAPH_FAILED, "[Call][MutableInputDesc] for %s return nullptr.", peer_op_desc->GetName().c_str());
REPORT_CALL_ERROR("E19999", "%s call MutableInputDesc return nullptr when ShapeInferenceEngine %s.",
peer_op_desc->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "%s call MutableInputDesc return nullptr.", peer_op_desc->GetName().c_str());
continue;
}

@@ -232,10 +231,10 @@ Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
if (tensor_shape.IsUnknownShape()) {
if (!fallback_with_range) {
GELOGE(INTERNAL_ERROR,
"[Is][UnknownShape] Output shape is still unknown after shape inference. shape = [%s].",
tensor_shape.ToString().c_str());
REPORT_INNER_ERROR("E19999", "Output shape is still unknown after shape inference. "
"shape = [%s] when ShapeInferenceEngine %s.", tensor_shape.ToString().c_str(), __FUNCTION__);
"[Is][UnknownShape] Output shape is still unknown after shape inference. shape = [%s].",
tensor_shape.ToString().c_str());
REPORT_INNER_ERROR("E19999", "Output shape is still unknown after shape inference. shape = [%s].",
tensor_shape.ToString().c_str());
return INTERNAL_ERROR;
}

@@ -244,9 +243,9 @@ Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
if (shape_range.size() != shape.size()) {
GELOGE(INTERNAL_ERROR, "[Check][Size] Number of shape ranges (%zu) mismatches that of dims (%zu).",
shape_range.size(), shape.size());
REPORT_INNER_ERROR("E19999", "Number of shape ranges (%zu) mismatches that of dims (%zu)"
" when ShapeInferenceEngine %s.", shape_range.size(), shape.size(), __FUNCTION__);
shape_range.size(), shape.size());
REPORT_INNER_ERROR("E19999", "Number of shape ranges (%zu) mismatches that of dims (%zu)",
shape_range.size(), shape.size());
return INTERNAL_ERROR;
}

@@ -271,23 +270,24 @@ Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
uint32_t type_size;
if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
GELOGE(INTERNAL_ERROR, "[Get][DataTypeLength] failed for type:%s.",
TypeUtils::DataTypeToSerialString(data_type).c_str());
REPORT_CALL_ERROR("E19999", "GetDataTypeLength failed for type:%s when ShapeInferenceEngine %s.",
TypeUtils::DataTypeToSerialString(data_type).c_str(), __FUNCTION__);
TypeUtils::DataTypeToSerialString(data_type).c_str());
REPORT_CALL_ERROR("E19999", "GetDataTypeLength failed for type:%s.",
TypeUtils::DataTypeToSerialString(data_type).c_str());
return INTERNAL_ERROR;
}

tensor_size = type_size;
for (const auto &dim : shape) {
GE_CHECK_GE(dim, 0);
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
"[Check][Overflow] Shape size overflow, shape = [%s]", GeShape(shape).ToString().c_str());
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), "[Check][Overflow] Shape size overflow, shape = [%s]",
GeShape(shape).ToString().c_str());
tensor_size *= dim;
}

GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
"[Check][Overflow]Tensor size is too large: %ld, shape = [%s] Shape size will overflow when add align.",
tensor_size, GeShape(shape).ToString().c_str());
"[Check][Overflow]Tensor size is too large:%ld, shape = [%s]"
"Shape size will overflow when add align.",
tensor_size, GeShape(shape).ToString().c_str());
tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
return SUCCESS;
}
@@ -302,9 +302,8 @@ Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bo
auto dims = shape.GetDims();
auto status_result = CanonicalizeShape(*tensor_desc, dims, fallback_with_range);
if (status_result != SUCCESS) {
REPORT_CALL_ERROR("E19999",
"Invoke CanonicalizeShape failed when ShapeInferenceEngine %s, node:%s, output:%zu.",
node_item.NodeName().c_str(), __FUNCTION__, output_index);
REPORT_CALL_ERROR("E19999", "CanonicalizeShape failed, node:%s, output:%zu.",
node_item.NodeName().c_str(), output_index);
GELOGE(ge::FAILED, "[Canonicalize][Shape] failed for [%s], output %zu.",
node_item.NodeName().c_str(), output_index);
return status_result;
@@ -312,10 +311,10 @@ Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bo
int64_t tensor_size;
status_result = CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size);
if (status_result != SUCCESS) {
REPORT_CALL_ERROR("E19999", "Invoke CalcTensorSize failed when ShapeInferenceEngine %s, node:%s, output:%zu.",
node_item.NodeName().c_str(), __FUNCTION__, output_index);
REPORT_CALL_ERROR("E19999", "Invoke CalcTensorSize failed, node:%s, output:%zu.",
node_item.NodeName().c_str(), output_index);
GELOGE(ge::FAILED, "[Calc][TensorSize] failed for [%s], output %zu.",
node_item.NodeName().c_str(), output_index);
node_item.NodeName().c_str(), output_index);
return status_result;
}
GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);


+ 47
- 15
ge/hybrid/model/hybrid_model.cc View File

@@ -44,9 +44,9 @@ Status HybridModel::Init(bool is_single_op) {
GELOGD("Start to init hybrid model.");
is_single_op_ = is_single_op;
if (is_single_op) {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model.");
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "[Build][HybridModel] for SingleOp failed.");
} else {
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model.");
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "[Build][HybridModel] failed.");
}
GELOGD("HybridModel initialized successfully.");
return SUCCESS;
@@ -106,7 +106,10 @@ const NodeItem *HybridModel::GetNodeItem(const NodePtr &node) const {
GeModelPtr HybridModel::GetGeModel(const NodePtr &node) const {
auto it = known_shape_sub_models_.find(node);
if (it == known_shape_sub_models_.end()) {
GELOGE(INTERNAL_ERROR, "[%s] Failed to get GeModel for subgraph node.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Check][Param:node][%s] Failed to get GeModel for subgraph node,"
"because node not in known_shape_sub_models_.", node->GetName().c_str());
REPORT_INNER_ERROR("E19999", "%s Failed to get GeModel for subgraph node,"
"because node not in known_shape_sub_models_.", node->GetName().c_str());
return nullptr;
}

@@ -130,7 +133,10 @@ const GraphItem *HybridModel::GetSubgraphItem(const std::string &graph_name) con

const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) const {
if (subgraph == nullptr) {
GELOGE(PARAM_INVALID, "subgraph is nullptr");
REPORT_INNER_ERROR("E19999", "Input param subgraph is nullptr, Graph:%s",
root_graph_item_->GetName().c_str());
GELOGE(PARAM_INVALID, "[Check][Param]subgraph is nullptr. graph:%s",
root_graph_item_->GetName().c_str());
return nullptr;
}

@@ -164,19 +170,27 @@ Status HybridModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_de
std::vector<uint32_t> &output_formats) {
auto node_item_list = root_graph_item_->GetInputNodes();
if (node_item_list.empty()) {
GELOGE(FAILED, "node item list is empty!");
REPORT_INNER_ERROR("E19999", "node item list is empty!, graph:%s",
root_graph_item_->GetName().c_str());
GELOGE(FAILED, "[Get][InputNodes]node item list is empty!, graph:%s",
root_graph_item_->GetName().c_str());
return FAILED;
}

GE_CHECK_NOTNULL(node_item_list[0]->node);
GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc());
if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) {
GELOGE(FAILED, "input size of op is not 1!");
REPORT_INNER_ERROR("E19999", "Input size of op is not 1, op:%s, type:%s",
node_item_list[0]->node->GetName().c_str(),
node_item_list[0]->node->GetType().c_str());
GELOGE(FAILED, "[Check][Size]input size of op is not 1! op:%s, type:%s",
node_item_list[0]->node->GetName().c_str(),
node_item_list[0]->node->GetType().c_str());
return FAILED;
}

GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");
GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "[Get][InputDescInfo] failed.");
GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "[Get][OutputDescInfo] failed.");

return SUCCESS;
}
@@ -231,7 +245,14 @@ Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, st
GeShape shape = op_desc->GetInputDescPtr(0)->GetShape();
int64_t tensor_size = 0;
if (TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Calculate tensor mem size failed.");
GELOGE(FAILED, "[Calculate][TensorMemSize] failed input0 desc in node:%s."
"shape:%s, format:%s, datatype:%s.", op_desc->GetName().c_str(),
shape.ToString().c_str(), TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str());
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed for input0 desc in node:%s,"
"shape:%s, format:%s, datatype:%s", op_desc->GetName().c_str(),
shape.ToString().c_str(), TypeUtils::FormatToSerialString(format).c_str(),
TypeUtils::DataTypeToSerialString(data_type).c_str());
return FAILED;
}
if (tensor_size == kMemSizeUnknownShape) {
@@ -249,7 +270,10 @@ Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, st

void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc,
InputOutputDescInfo &output_desc_info, uint32_t &format_result) {
GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return );
GE_IF_BOOL_EXEC(output_desc == nullptr,
REPORT_INNER_ERROR("E19999", "param output_desc is nullptr, check invalid.");
GELOGE(FAILED, "[Check][Param:output_desc]output desc ptr is nullptr");
return );
Format format = output_desc->GetFormat();
GeShape shape = output_desc->GetShape();
std::vector<std::pair<int64_t,int64_t>> shape_ranges;
@@ -290,7 +314,9 @@ void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc,
Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) {
std::vector<ConstGeTensorDescPtr> output_desc_list;
// output_desc_list contains vaild input desc
GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed");
GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list),
"[Invoke][GetOutputDescList]get output desc info failed, Graph:%s",
root_graph_item_->GetName().c_str());

vector<std::string> out_node_names;
(void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names);
@@ -300,8 +326,12 @@ Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc,
GE_CHECK_NOTNULL(op_desc);

auto out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(),
FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size());
GE_IF_BOOL_EXEC(out_size != output_desc_list.size(),
REPORT_INNER_ERROR("E19999", "output size[%u] not match output_desc_list size[%zu]",
out_size, output_desc_list.size());
GELOGE(FAILED, "[Check][Size]output size[%u] not match output_desc_list size[%zu]",
out_size, output_desc_list.size());
return FAILED;);

for (uint32_t index = 0; index < out_size; ++index) {
string output_name;
@@ -329,7 +359,8 @@ Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc,

TensorValue *HybridModel::GetConstant(const NodePtr &node) const {
if (node == nullptr) {
GELOGE(PARAM_INVALID, "Param is null");
GELOGE(PARAM_INVALID, "[Check][Param:node]node is null.");
REPORT_INNER_ERROR("E19999", "param node is null, check invalid.");
return nullptr;
}

@@ -347,7 +378,8 @@ TensorValue *HybridModel::GetConstant(const NodePtr &node) const {

TensorValue *HybridModel::GetTensor(const NodePtr &node) const {
if (node == nullptr) {
GELOGE(PARAM_INVALID, "Param is null");
GELOGE(PARAM_INVALID, "[Check][Param:node]node is null.");
REPORT_INNER_ERROR("E19999", "param node is null, check invalid.");
return nullptr;
}



+ 78
- 93
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -71,9 +71,10 @@ Status SetOutputNameAttr(ComputeGraph &graph) {
}
}
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names),
GELOGE(FAILED, "[Invoke][SetListStr] failed, name:%s.", ATTR_MODEL_OUT_NODES_NAME.c_str());
REPORT_CALL_ERROR("E19999", "SetListStr failed when %s, name:%s.",
__FUNCTION__, ATTR_MODEL_OUT_NODES_NAME.c_str());
GELOGE(FAILED, "[Invoke][SetListStr] failed, graph:%s name:%s.", graph.GetName().c_str(),
ATTR_MODEL_OUT_NODES_NAME.c_str());
REPORT_CALL_ERROR("E19999", "SetListStr failed, graph:%s name:%s.", graph.GetName().c_str(),
ATTR_MODEL_OUT_NODES_NAME.c_str());
return FAILED);
return SUCCESS;
}
@@ -110,13 +111,12 @@ Status CollectDependenciesForFusedGraph(NodeItem &node_item, std::set<OpDesc *>
auto src_op_desc = src_node->GetOpDesc();
GE_CHECK_NOTNULL(src_op_desc);
if (src_node->GetType() != DATA_TYPE) {
GELOGE(UNSUPPORTED,
"[Check][NodeType][%s::%s] Node in fused subgraph can only depend on Data nodes,"
"but depend on %s actually",
node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str());
GELOGE(UNSUPPORTED, "[Check][NodeType][%s::%s] Node in fused subgraph can only depend on Data nodes,"
"but depend on %s actually", node_item.NodeName().c_str(), node->GetName().c_str(),
src_node->GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s::%s] Node in fused subgraph can only depend on Data nodes,"
" but depend on %s actually, check invalid when %s.",
node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str(), __FUNCTION__);
"but depend on %s actually.", node_item.NodeName().c_str(), node->GetName().c_str(),
src_node->GetType().c_str());
return UNSUPPORTED;
}

@@ -134,17 +134,17 @@ HybridModelBuilder::HybridModelBuilder(HybridModel &hybrid_model)

Status HybridModelBuilder::Build() {
GE_CHK_STATUS_RET(ValidateParams(), "[Invoke][ValidateParams] failed, model_name_:[%s]", GetGraphName());
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName());
GE_CHK_STATUS_RET(InitRuntimeParams(), "[Invoke][InitRuntimeParams] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(),
"[Invoke][RecoverGraphUnknownFlag] failed, model_name_:[%s]", GetGraphName());
"[Invoke][RecoverGraphUnknownFlag] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(IndexSpecialNodes(), "[Invoke][IndexSpecialNodes] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(IndexTaskDefs(), "[Invoke][IndexTaskDefs] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[Invoke][InitWeights] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[Invoke][LoadGraph] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(AssignUninitializedConstantOps(),
"[Invoke][AssignUninitializedConstantOps] failed, model_name_:[%s]", GetGraphName());
"[Invoke][AssignUninitializedConstantOps] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(TransAllVarData(), "[Invoke][TransAllVarData] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(CopyVarData(), "[Invoke][CopyVarData] failed, model_name_:[%s]", GetGraphName());
GE_CHK_STATUS_RET(InitModelMem(), "[Invoke][InitModelMem] failed, model_name_:[%s]", GetGraphName());
@@ -194,8 +194,7 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite
auto out_data_anchor = node->GetOutDataAnchor(i);
if (out_data_anchor == nullptr) {
GELOGE(INTERNAL_ERROR, "[Get][OutDataAnchor]out anchor[%d] of node %s is nullptr", i, node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "out anchor[%d] of node %s is nullptr when %s",
i, node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "out anchor[%d] of node %s is nullptr.", i, node->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -208,11 +207,10 @@ Status HybridModelBuilder::BuildNodeItem(const NodePtr &node, NodeItem &node_ite

NodeItem *dst_node_item = nullptr;
GE_CHK_STATUS_RET(GetOrCreateNodeItem(dst_node, &dst_node_item),
"[GetOrCreate][NodeItem] failed, dst_node:[%s].",
dst_node->GetName().c_str());
"[GetOrCreate][NodeItem] failed, dst_node:[%s].", dst_node->GetName().c_str());
int canonical_index;
GE_CHK_STATUS_RET(dst_node_item->GetCanonicalInputIndex(dst_in_anchor->GetIdx(), canonical_index),
"[Invoke][GetCanonicalInputIndex] failed, dst_node:[%s].", dst_node->GetName().c_str());
"[Invoke][GetCanonicalInputIndex] failed, dst_node:[%s].", dst_node->GetName().c_str());

node_item.outputs[i].emplace_back(canonical_index, dst_node_item);
}
@@ -341,9 +339,9 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
int input_index = node_item.op_desc->GetInputIndexByName(input_name);
if (input_index < 0) {
GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.",
node_item.NodeName().c_str(), input_name.c_str());
REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed when HybridModelBuilder %s, node:[%s] inputname: %s.",
__FUNCTION__, node_item.NodeName().c_str(), input_name.c_str());
node_item.NodeName().c_str(), input_name.c_str());
REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed, node:[%s] inputname: %s.",
node_item.NodeName().c_str(), input_name.c_str());
return INTERNAL_ERROR;
}

@@ -392,9 +390,9 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(INTERNAL_ERROR, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "invoke GetInt failed when %s, node:[%s] attr:[%s]",
__FUNCTION__, op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "invoke GetInt failed, node:[%s] attr:[%s]",
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return INTERNAL_ERROR;
}

@@ -425,8 +423,7 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, s
Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Invoke][SetAllAnchorStatus] failed, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] NodeUtils::SetAllAnchorStatus failed when %s.",
node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "[%s] NodeUtils::SetAllAnchorStatus failed.", node->GetName().c_str());
return INTERNAL_ERROR;
}
for (auto &anchor : node->GetAllInDataAnchors()) {
@@ -434,23 +431,20 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
if (peer_anchor == nullptr) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_SUSPEND) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_SUSPEND, node:[%s].",
node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_SUSPEND, node:[%s] when HybridModelBuilder %s.",
node->GetName().c_str(), __FUNCTION__);
node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_SUSPEND, node:[%s].", node->GetName().c_str());
return INTERNAL_ERROR;
}
} else if (peer_anchor->GetOwnerNode()->GetType() == CONSTANT) {
if (AnchorUtils::SetStatus(anchor, ANCHOR_CONST) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_CONST, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_CONST, node:[%s] when HybridModelBuilder %s.",
node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_CONST, node:[%s].", node->GetName().c_str());
return INTERNAL_ERROR;
}
} else {
if (AnchorUtils::SetStatus(anchor, ANCHOR_DATA) != GRAPH_SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Invoke][SetStatus] failed to set ANCHOR_DATA, node:[%s].", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_DATA, node:[%s] when HybridModelBuilder %s.",
node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "SetStatus failed to set ANCHOR_DATA, node:[%s].", node->GetName().c_str());
return INTERNAL_ERROR;
}
}
@@ -462,8 +456,9 @@ Status HybridModelBuilder::UpdateAnchorStatus(const NodePtr &node) {
Status HybridModelBuilder::DoUnlinkDataAnchors(const OutDataAnchorPtr &out_data_anchor,
const InDataAnchorPtr &in_data_anchor) {
GE_CHK_GRAPH_STATUS_RET(out_data_anchor->Unlink(in_data_anchor),
"[Invoke][Unlink] failed to unlink %s:%d from %s:%d", out_data_anchor->GetOwnerNode()->GetName().c_str(),
out_data_anchor->GetIdx(), in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx());
"[Invoke][Unlink] failed to unlink %s:%d from %s:%d",
out_data_anchor->GetOwnerNode()->GetName().c_str(), out_data_anchor->GetIdx(),
in_data_anchor->GetOwnerNode()->GetName().c_str(), in_data_anchor->GetIdx());

GELOGD("Succeeded in unlinking %s:%d from %s:%d",
out_data_anchor->GetOwnerNode()->GetName().c_str(),
@@ -507,9 +502,9 @@ Status HybridModelBuilder::MergeInputNodes(ComputeGraph &graph) {
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, "[Invoke][GetInt] failed, node:[%s] attr:[%s]",
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "GetInt failed when %s, node:[%s] attr:[%s]",
__FUNCTION__, data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "GetInt failed, node:[%s] attr:[%s]",
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED;
}

@@ -576,9 +571,8 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
auto input_desc = net_output_desc->MutableInputDesc(index);
if (input_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "[Invoke][MutableInputDesc][%s] Failed to get input desc[%d]",
net_output_desc->GetName().c_str(), index);
REPORT_CALL_ERROR("E19999", "[%s] Failed to get input desc[%d] when HybridModelBuilder %s.",
net_output_desc->GetName().c_str(), index, __FUNCTION__);
net_output_desc->GetName().c_str(), index);
REPORT_CALL_ERROR("E19999", "[%s] Failed to get input desc[%d].", net_output_desc->GetName().c_str(), index);
return INTERNAL_ERROR;
}

@@ -660,7 +654,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeG

// invoke before adding subgraphs. in case modify node id in known-shaped subgraphs.
GE_CHK_GRAPH_STATUS_RET(merged_graph->TopologicalSorting(),
"[Invoke][TopologicalSorting]Failed to invoke TopologicalSorting on merged graph.");
"[Invoke][TopologicalSorting]Failed to invoke TopologicalSorting on merged graph.");
GE_DUMP(merged_graph, "hybrid_merged_graph_BeforeStageSort");
merged_graph->TopologicalSorting([](const NodePtr &a, const NodePtr &b) -> bool {
uint32_t a_level = UINT32_MAX;
@@ -780,7 +774,7 @@ Status HybridModelBuilder::LoadGraph() {
root_graph->GetDirectNodesSize(),
root_graph->GetAllNodesSize());
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph),
"[Invoke][UnfoldSubgraphs]Failed to unfold subgraphs, model_name_:%s.", GetGraphName());
"[Invoke][UnfoldSubgraphs]Failed to unfold subgraphs, model_name_:%s.", GetGraphName());
root_graph = std::move(merged_graph);
GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
root_graph->GetDirectNodesSize(),
@@ -803,10 +797,10 @@ Status HybridModelBuilder::LoadGraph() {
}
GE_DUMP(root_graph, "hybrid_merged_graph");
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true),
"[Invoke][LoadDynamicSubgraph]Failed to load root graph, model_name_:%s.", GetGraphName());
"[Invoke][LoadDynamicSubgraph]Failed to load root graph, model_name_:%s.", GetGraphName());
GELOGD("Done loading root graph successfully.");
GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(),
"[Invoke][GroupNodes]Failed to group nodes for root graph, model_name_:%s.", GetGraphName());
"[Invoke][GroupNodes]Failed to group nodes for root graph, model_name_:%s.", GetGraphName());

for (auto &sub_graph : root_graph->GetAllSubgraphs()) {
GE_CHECK_NOTNULL(sub_graph);
@@ -842,8 +836,8 @@ Status HybridModelBuilder::LoadGraph() {
}

GE_CHK_STATUS_RET(ParseDependentByParallelGroup(),
"[Invoke][ParseDependentByParallelGroup]Failed to establish dependencies for hccl ops, model_name_:%s.",
GetGraphName());
"[Invoke][ParseDependentByParallelGroup]Failed to establish dependencies for hccl ops,"
"model_name_:%s.", GetGraphName());
GELOGI("Done loading all subgraphs successfully.");
return SUCCESS;
}
@@ -872,12 +866,10 @@ Status HybridModelBuilder::VarNodeToTensor(const NodePtr &var_node, std::unique_
}
uint8_t *dev_mem = var_manager_->GetVarMemoryAddr(var_logic, memory_type);
if (dev_mem == nullptr) {
GELOGE(INTERNAL_ERROR,
"[Invoke][GetVarMemoryAddr]Failed to copy var %s from device, cant not get var addr from logic addr %p",
var_node->GetName().c_str(), var_logic);
REPORT_CALL_ERROR("E19999",
"GetVarMemoryAddr failed when %s, Failed to copy var %s from device, cant not get var addr from logic addr %p",
__FUNCTION__, var_node->GetName().c_str(), var_logic);
GELOGE(INTERNAL_ERROR, "[Invoke][GetVarMemoryAddr]Failed to copy var %s from device,"
"cant not get var addr from logic addr %p", var_node->GetName().c_str(), var_logic);
REPORT_CALL_ERROR("E19999", "GetVarMemoryAddr failed, Failed to copy var %s from device,"
"cant not get var addr from logic addr %p", var_node->GetName().c_str(), var_logic);
return INTERNAL_ERROR;
}

@@ -905,7 +897,7 @@ Status HybridModelBuilder::HandleDtString(const GeTensor &tensor, void *var_addr
auto &mutable_tensor = const_cast<GeTensor &>(tensor);
uint64_t *buff = reinterpret_cast<uint64_t *>(mutable_tensor.MutableData().data());
GE_CHK_BOOL_RET_STATUS(ge::CheckInt64Uint32MulOverflow(elem_num, kBytes * kStringHeadElems) == SUCCESS, FAILED,
"[Invoke][CheckInt64Uint32MulOverflow] failed because Shape size is invalid.");
"[Invoke][CheckInt64Uint32MulOverflow] failed because Shape size is invalid.");
auto offset = static_cast<uint64_t>(elem_num * kBytes * kStringHeadElems);
auto hbm_raw_data_base_addr =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(var_addr) + offset);
@@ -971,7 +963,8 @@ Status HybridModelBuilder::InitConstantOps() {
GELOGD("Init tensor with host constant %s size = %zu", var_name.c_str(), aligned_tensor.MutableData().GetSize());
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(aligned_tensor.GetAlignedPtr(),
aligned_tensor.GetData().size()) == nullptr) {
GELOGE(MEMALLOC_FAILED, "[Malloc][HostMemory] for an existed GeTensor failed, model_name_:%s.", GetGraphName());
GELOGE(MEMALLOC_FAILED, "[Malloc][HostMemory] for an existed GeTensor failed, model_name_:%s.",
GetGraphName());
return MEMALLOC_FAILED;
}
var_tensor.reset(new(std::nothrow)TensorValue(aligned_tensor.MutableData().data(),
@@ -985,7 +978,8 @@ Status HybridModelBuilder::InitConstantOps() {
if (ge_tensor->GetData().size() > 0) {
GE_CHK_STATUS_RET_NOLOG(HandleDtString(*ge_tensor, v_output_addr));

GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%zu] datasize[%zu]",
GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p]"
"mem_size[%zu] datasize[%zu]",
runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr, v_output_size,
ge_tensor->GetData().size());
GE_CHK_RT_RET(rtMemcpy(v_output_addr, v_output_size, ge_tensor->GetData().data(), ge_tensor->GetData().size(),
@@ -1020,10 +1014,9 @@ Status HybridModelBuilder::InitVariableTensors() {
GE_CHECK_NOTNULL(op_desc);
GeTensorDesc output_tensor = op_desc->GetOutputDesc(0);
int64_t tensor_size = 0;
if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(), output_tensor.GetDataType(),
tensor_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed when HybridModelBuilder %s, node name:%s",
__FUNCTION__, it.first.c_str());
if (TensorUtils::CalcTensorMemSize(output_tensor.GetShape(), output_tensor.GetFormat(),
output_tensor.GetDataType(), tensor_size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "CalcTensorMemSize failed, node name:%s", it.first.c_str());
GELOGE(INTERNAL_ERROR, "[Calculate][TensorMemSize] failed, node name:%s", it.first.c_str());
return INTERNAL_ERROR;
}
@@ -1034,8 +1027,8 @@ Status HybridModelBuilder::InitVariableTensors() {
}
if (MemManager::Instance().HostMemInstance(RT_MEMORY_HBM).Malloc(mem_info.host_aligned_ptr,
tensor_size) == nullptr) {
GELOGE(MEMALLOC_FAILED,
"[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].", it.first.c_str());
GELOGE(MEMALLOC_FAILED, "[Malloc][HostMem] for an existed GeTensor failed, Host variable [%s].",
it.first.c_str());
return MEMALLOC_FAILED;
}
GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size);
@@ -1087,8 +1080,7 @@ Status HybridModelBuilder::InitWeights() {
auto v_weights = ModelUtils::GetWeights(op_desc);
if (v_weights.empty()) {
GELOGE(INTERNAL_ERROR, "[Invoke][GetWeights][%s] Constant has no value", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Constant has no value when %s.",
node->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "[%s] Constant has no value.", node->GetName().c_str());
return INTERNAL_ERROR;
}
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get());
@@ -1128,7 +1120,7 @@ Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
node_item.kernel_task);
if (load_ret != UNSUPPORTED && load_ret != SUCCESS) {
GELOGE(load_ret, "[Invoke][LoadTask][%s] Failed to load task", node_ptr->GetName().c_str());
REPORT_CALL_ERROR("E19999", "[%s] Failed to load task when %s", node_ptr->GetName().c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "[%s] Failed to load task", node_ptr->GetName().c_str());
return load_ret;
}

@@ -1215,7 +1207,7 @@ Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const
auto iter = node_map.find(op_index);
if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by op_index = %u", op_index);
REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u when %s.", op_index, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "Failed to get node by op_index = %u.", op_index);
return INTERNAL_ERROR;
}

@@ -1286,7 +1278,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
auto iter = node_map.find(op_index);
if (iter == node_map.end()) {
GELOGE(INTERNAL_ERROR, "[Find][Node]Failed to get node by index = %u.", op_index);
REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u when %s.", op_index, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "Failed to get node by index = %u.", op_index);
return INTERNAL_ERROR;
}

@@ -1351,18 +1343,17 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GELOGD("To get peer node of %s::%s", sub_graph->GetName().c_str(), data_node->GetName().c_str());
auto wrapped_node = data_node->GetOwnerComputeGraph()->GetParentNode();
if (wrapped_node == nullptr) {
REPORT_INNER_ERROR("E19999", "[%s] Node is in root graph when HybridModelBuilder %s.",
data_node->GetName().c_str(), __FUNCTION__);
REPORT_INNER_ERROR("E19999", "[%s] Node is in root graph.", data_node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetParentNode][%s] Node is in root graph.", data_node->GetName().c_str());
return INTERNAL_ERROR;
}
auto data_op_desc = data_node->GetOpDesc();
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(data_op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
REPORT_CALL_ERROR("E19999", "[%s] Failed to get attr [%s] when HybridModelBuilder %s.",
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str(), __FUNCTION__);
REPORT_CALL_ERROR("E19999", "[%s] Failed to get attr [%s].", data_op_desc->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][GetInt][%s] Failed to get attr [%s]",
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
data_op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return INTERNAL_ERROR;
}

@@ -1370,10 +1361,9 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GE_CHECK_NOTNULL(wrapped_node_in_anchor);
auto src_out_anchor = wrapped_node_in_anchor->GetPeerOutAnchor();
if (src_out_anchor == nullptr || src_out_anchor->GetOwnerNode() == nullptr) {
REPORT_INNER_ERROR("E19999", "[%s] Parent node do not have peer anchor when HybridModelBuilder %s.",
data_node->GetName().c_str(), __FUNCTION__);
REPORT_INNER_ERROR("E19999", "[%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
GELOGE(INTERNAL_ERROR,
"[Check][ParentNode][%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
"[Check][ParentNode][%s] Parent node do not have peer anchor.", data_node->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -1397,10 +1387,9 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
GE_CHECK_NOTNULL(src_graph);
auto src_net_output_node = src_graph->FindFirstNodeMatchType(NETOUTPUT);
if (src_net_output_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Failed to find NetOutput in subgraph: %s when HybridModelBuilder %s",
src_graph->GetName().c_str(), __FUNCTION__);
REPORT_INNER_ERROR("E19999", "Failed to find NetOutput in subgraph: %s", src_graph->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][FindFirstNodeMatchType]Failed to find NetOutput in subgraph: %s",
src_graph->GetName().c_str());
src_graph->GetName().c_str());
return INTERNAL_ERROR;
}
auto net_output_desc = src_net_output_node->GetOpDesc();
@@ -1438,10 +1427,10 @@ Status HybridModelBuilder::GetPeerNodeAcrossSubGraphs(const NodePtr &data_node,
}
}

GELOGE(FAILED, "[Get][PeerNode]Failed to find peer node for %s::%s",
sub_graph->GetName().c_str(), data_node->GetName().c_str());
REPORT_INNER_ERROR("E19999", "Failed to find peer node for %s::%s when %s.",
sub_graph->GetName().c_str(), data_node->GetName().c_str(), __FUNCTION__);
GELOGE(FAILED, "[Get][PeerNode]Failed to find peer node for %s::%s", sub_graph->GetName().c_str(),
data_node->GetName().c_str());
REPORT_INNER_ERROR("E19999", "Failed to find peer node for %s::%s.",
sub_graph->GetName().c_str(), data_node->GetName().c_str());
return FAILED;
}
Status HybridModelBuilder::InitRuntimeParams() {
@@ -1449,7 +1438,7 @@ Status HybridModelBuilder::InitRuntimeParams() {
bool ret = false;
if (ge_root_model_->GetSubgraphInstanceNameToModel().empty()) {
GELOGE(INTERNAL_ERROR, "[Get][SubModel]Root model has no sub model, model:%s.", GetGraphName());
REPORT_INNER_ERROR("E19999", "Root model has no sub model when %s, model:%s.", __FUNCTION__, GetGraphName());
REPORT_INNER_ERROR("E19999", "Root model has no sub model, model:%s.", GetGraphName());
return INTERNAL_ERROR;
}

@@ -1596,9 +1585,9 @@ Status HybridModelBuilder::GetParentNodeOutputIndex(const OpDesc &op_desc, int i
GE_CHECK_NOTNULL(input_desc);
if (!AttrUtils::GetInt(input_desc, ATTR_NAME_PARENT_NODE_INDEX, out_index)) {
GELOGE(INTERNAL_ERROR, "[Invoke][GetInt]NetOutput %s input tensor %d, attr %s not found.",
op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "NetOutput %s input tensor %d, attr %s not found when %s.",
op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str(), __FUNCTION__);
op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "NetOutput %s input tensor %d, attr %s not found.",
op_desc.GetName().c_str(), index, ATTR_NAME_PARENT_NODE_INDEX.c_str());
return INTERNAL_ERROR;
}
return SUCCESS;
@@ -1632,7 +1621,7 @@ Status HybridModelBuilder::TransAllVarData() {
rtError_t rt_ret = rtCtxGetCurrent(&ctx);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[Invoke][rtCtxGetCurrent]Failed to get current context, error_code is: 0x%X.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtCtxGetCurrent failed when %s, error_code: 0x%X.", __FUNCTION__, rt_ret);
REPORT_CALL_ERROR("E19999", "rtCtxGetCurrent failed, error_code: 0x%X.", rt_ret);
return RT_FAILED;
}

@@ -2055,9 +2044,9 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,
} else {
if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, data_index)) {
GELOGE(FAILED, "[Invoke][GetInt][%s] Failed to get attr [%s]",
node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "call GetInt failed when HybridModelBuilder %s, [%s] Failed to get attr [%s]",
__FUNCTION__, node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "call GetInt failed, [%s] Failed to get attr [%s]",
node->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED;
}
}
@@ -2207,23 +2196,19 @@ Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() {
}
}
}

if (constant_node == nullptr) {
GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str());
continue;
}

if (converted[constant_node].count(output_idx) == 0) {
GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx),
"[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str());
converted[constant_node].emplace(output_idx);
}

src_node_item->to_const_output_id_list.erase(output_idx);
--ref_counts[src_node_item];
changed = true;
}

if (changed) {
std::vector<NodePtr> depends_to_keep;
for (auto &ref_count_it : ref_counts) {


+ 24
- 18
ge/hybrid/model/node_item.cc View File

@@ -36,10 +36,10 @@ std::set<std::string> kControlOpTypes{
Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgraph) {
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED,
"[%s] Failed to get attr [%s]",
op_desc.GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(FAILED, "[Invoke][GetInt][%s] Failed to get attr [%s]",
op_desc.GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "[%s] Failed to get attr [%s]",
op_desc.GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED;
}

@@ -58,10 +58,10 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgraph) {
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED,
"[%s] Failed to get attr [%s]",
op_desc->GetName().c_str(),
ATTR_NAME_PARENT_NODE_INDEX.c_str());
GELOGE(FAILED, "[Invoke][GetInt][%s] Failed to get attr [%s]",
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
REPORT_CALL_ERROR("E19999", "[%s] Failed to get attr [%s].",
op_desc->GetName().c_str(), ATTR_NAME_PARENT_NODE_INDEX.c_str());
return FAILED;
}

@@ -122,7 +122,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
GE_CHECK_NOTNULL(node->GetOpDesc());
std::unique_ptr<NodeItem> instance(new(std::nothrow)NodeItem(node));
GE_CHECK_NOTNULL(instance);
GE_CHK_STATUS_RET(instance->Init(), "Failed to init NodeItem [%s] .", node->GetName().c_str());
GE_CHK_STATUS_RET(instance->Init(), "[Invoke][Init]Failed to init NodeItem [%s] .", node->GetName().c_str());
node_item = std::move(instance);
return SUCCESS;
}
@@ -171,7 +171,7 @@ Status NodeItem::ResolveDynamicState() {
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic),
"[%s] Failed to get shape status.",
"[Invoke][GetNodeUnknownShapeStatus][%s] Failed to get shape status.",
node->GetName().c_str());
}
return SUCCESS;
@@ -241,7 +241,8 @@ Status NodeItem::Init() {
ResolveUnknownShapeType();
if (is_dynamic) {
GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this),
"[Invoke][ParseFusedSubgraph][%s] Failed to parse fused subgraph", node_name.c_str());
}

return SUCCESS;
@@ -303,11 +304,10 @@ GeTensorDescPtr NodeItem::DoGetInputDesc(int index) const {
}

if (index < 0 || index >= num_inputs) {
GELOGE(PARAM_INVALID,
"[%s] Invalid input index, num inputs = %d, index = %d",
node_name.c_str(),
num_inputs,
index);
GELOGE(PARAM_INVALID, "[Check][Param:index][%s] Invalid input index, num inputs = %d, index = %d",
node_name.c_str(), num_inputs, index);
REPORT_INNER_ERROR("E19999", "Invalid input index, node:%s num inputs = %d, index = %d",
node_name.c_str(), num_inputs, index);
return nullptr;
}

@@ -356,7 +356,11 @@ Status NodeItem::GetCanonicalInputIndex(uint32_t index, int &canonical_index) co

auto iter = std::find(input_desc_indices_.begin(), input_desc_indices_.end(), index);
if (iter == input_desc_indices_.end()) {
GELOGE(INTERNAL_ERROR, "[%s] Invalid input index: %u", node_name.c_str(), index);
GELOGE(INTERNAL_ERROR,
"[Check][Param:index]input index:%u not in input_desc_indices_, check Invalid, node:%s",
index, node_name.c_str());
REPORT_INNER_ERROR("E19999", "input index:%u not in input_desc_indices_, check Invalid, node:%s",
index, node_name.c_str());
return INTERNAL_ERROR;
}

@@ -371,7 +375,9 @@ bool NodeItem::IsInputShapeStatic(int index) const {
}

if (static_cast<size_t>(index) >= is_input_shape_static_.size()) {
GELOGE(PARAM_INVALID, "Input index(%d) out of range: [0, %zu)", index, is_input_shape_static_.size());
GELOGE(PARAM_INVALID, "[Check][Param:index]Input index(%d) out of range: [0, %zu)",
index, is_input_shape_static_.size());
REPORT_INNER_ERROR("E19999", "Input index(%d) out of range: [0, %zu).", index, is_input_shape_static_.size());
return false;
}



+ 21
- 11
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -42,7 +42,7 @@ AiCoreNodeTask::AiCoreNodeTask(std::vector<std::unique_ptr<AiCoreOpTask>> &&task
Status AiCoreNodeExecutor::Initialize() {
compiler_ = TaskCompilerFactory::GetInstance().GetTaskCompiler();
if (compiler_ != nullptr) {
GE_CHK_STATUS_RET(compiler_->Initialize(), "Failed to init aicore task compiler.");
GE_CHK_STATUS_RET(compiler_->Initialize(), "[Init][TaskCompiler] failed.");
}
return SUCCESS;
}
@@ -60,8 +60,12 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod
node->GetName().c_str());
return SUCCESS;
} else {
GELOGE(FAILED, "Task_defs is empty for node (%s) which 'support_dynamicshape' is true, failed.",
GELOGE(FAILED, "[Invoke][GetBool]Task_defs is empty for node (%s)"
"which 'support_dynamicshape' is true, check invalid",
node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "Task_defs is empty for node (%s)"
"which 'support_dynamicshape' is true, check invalid",
node->GetName().c_str());
return FAILED;
}
}
@@ -69,7 +73,7 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod
AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs);
std::unique_ptr<AiCoreNodeTask> node_task;
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op),
"[%s] Failed to build op tasks.", node->GetName().c_str());
"[Invoke][BuildTask][%s] Failed to build op tasks.", node->GetName().c_str());
task = std::move(node_task);
GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str());
return SUCCESS;
@@ -105,7 +109,8 @@ bool AiCoreNodeTaskRegistry::AddTask(const std::string &node_key, const std::sha
std::lock_guard<std::mutex> lock(mutex_);
auto iter = reg_node_tasks_.find(node_key);
if (iter != reg_node_tasks_.end()) {
GELOGE(FAILED, "AiCoreNodeTaskRegistry(%s) AddTask failed, key already exist.", node_key.c_str());
GELOGE(FAILED, "[Add][Task] failed, key:%s already exist.", node_key.c_str());
REPORT_INNER_ERROR("E19999", "AddTask failed, key:%s already exist.", node_key.c_str());
return false;
}
auto ret = reg_node_tasks_.emplace(node_key, task);
@@ -131,13 +136,14 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model,

auto ori_node_name = node->GetName();
if (compiler_ == nullptr) {
GELOGE(FAILED, "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str());
GELOGE(FAILED, "[Find][Compiler][%s] Can not find any valid aicore task compiler.", ori_node_name.c_str());
REPORT_INNER_ERROR("E19999", "[%s] Can not find any valid aicore task compiler.", ori_node_name.c_str());
return FAILED;
}

AiCoreNodeTaskRegistry &registry = AiCoreNodeTaskRegistry::GetInstance();
std::string shape_key;
GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "GenNodeKey failed, op name = %s.", node->GetName().c_str());
GE_CHK_STATUS_RET(GenNodeKey(node, shape_key), "[Generate][NodeKey] failed, op name = %s.", node->GetName().c_str());

auto node_key = std::to_string(model.GetModelId()) + "/" + shape_key;
GELOGD("NodeKey for %s = %s", node->GetName().c_str(), node_key.c_str());
@@ -152,19 +158,21 @@ Status AiCoreNodeExecutor::CompileTask(const HybridModel &model,

std::vector<domi::TaskDef> task_defs;
op_desc->SetName(ori_node_name + "_" + shape_key);
GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "Compile op(%s) failed.", ori_node_name.c_str());
GE_CHK_STATUS_RET(compiler_->CompileOp(node, task_defs), "[Compile][Op:%s] failed.", ori_node_name.c_str());
op_desc->SetName(ori_node_name);
GELOGD("successfully generated task_defs: %s", node->GetName().c_str());

AiCoreTaskBuilder builder(node->GetOpDesc(), task_defs);
std::unique_ptr<AiCoreNodeTask> node_task;
GE_CHK_STATUS_RET(builder.BuildTask(node_task, false), "[%s] Failed to build op tasks.", node->GetName().c_str());
GE_CHK_STATUS_RET(builder.BuildTask(node_task, false),
"[Invoke][BuildTask][%s] Failed to build op tasks.", node->GetName().c_str());
node_task->SetWorkspaceSizes(op_desc->GetWorkspaceBytes());
aicore_task = std::move(node_task);
GELOGD("successfully created node task: %s", node->GetName().c_str());

if (!registry.AddTask(node_key, aicore_task)) {
GELOGE(INTERNAL_ERROR, "Add NodeTask failed, op name = %s.", node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Add][NodeTask] failed, op name = %s.", node->GetName().c_str());
REPORT_CALL_ERROR("E19999", "add task failed, op name = %s.", node->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -196,7 +204,8 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
GELOGE(RT_FAILED, "[Invoke][rtGetTaskIdAndStreamID] failed, ret: 0x%X.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
@@ -271,7 +280,8 @@ Status AiCoreNodeTask::CheckOverflow(TaskContext &context) {
GELOGW("Dynamic shape op %s is over flow", context.GetNodeName());
return SUCCESS;
} else if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "rtstreamsynchronize failed");
GELOGE(rt_ret, "[Invoke][rtstreamsynchronize] failed, ret:%d.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtstreamsynchronize failed, ret:%d.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;


+ 121
- 57
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -95,7 +95,12 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
GELOGE(PARAM_INVALID, "[Check][JsonStr]Attr:%s in op:%s(%s), value:%s check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), op_desc_ptr->GetName().c_str(),
op_desc_ptr->GetType().c_str(), json_string.c_str());
REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value:%s check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), op_desc_ptr->GetName().c_str(),
op_desc_ptr->GetType().c_str(), json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
@@ -107,7 +112,8 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data),
GELOGI("Get original type of json_string"));
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
GE_IF_BOOL_EXEC(!meta_data.empty(),
GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
} else {
GELOGI("TBE: find the binfile_key[%s] in HandleMap", stub_name_.c_str());
@@ -117,7 +123,8 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name),
GELOGI("Get original type of kernel_name"));
GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0));
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(),
stub_name_.c_str(), kernel_name.c_str(), 0));
}
return SUCCESS;
}
@@ -126,7 +133,9 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
TbeHandleRegistry &registry = TbeHandleRegistry::GetInstance();
auto tbe_kernel = op_desc.TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc.GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Invoke][TryGetExtAttr]TBE: %s can't find tvm bin file!",
op_desc.GetName().c_str());
REPORT_CALL_ERROR("E19999", "TBE: %s can't find tvm bin file.", op_desc.GetName().c_str());
return INTERNAL_ERROR;
}

@@ -143,7 +152,12 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
} else {
GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
GELOGE(PARAM_INVALID, "[Check][JsonStr]Attr:%s in op:%s(%s), value:%s check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), op_desc.GetName().c_str(),
op_desc.GetType().c_str(), json_string.c_str());
REPORT_INNER_ERROR("E19999", "Attr:%s in op:%s(%s), value:%s check invalid",
TVM_ATTR_NAME_MAGIC.c_str(), op_desc.GetName().c_str(),
op_desc.GetType().c_str(), json_string.c_str());
return PARAM_INVALID;
}
binary.version = 0;
@@ -154,11 +168,15 @@ Status AiCoreOpTask::RegisterKernelHandle(const OpDesc &op_desc) {
handle_ = bin_handle;
auto holder = std::unique_ptr<TbeHandleHolder>(new (std::nothrow) TbeHandleHolder(handle_));
if (holder == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create HandleHodler failed.");
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"[Create][TbeHandleHolder] failed, node name = %s", op_desc.GetName().c_str());
REPORT_CALL_ERROR("E19999", "create TbeHandleHolder failed, node name = %s.",
op_desc.GetName().c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
if (!registry.AddHandle(std::move(holder))) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Add handle failed. node name = %s", op_desc.GetName().c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Add][Handle] failed. node name = %s", op_desc.GetName().c_str());
REPORT_CALL_ERROR("E19999", "AddHandle failed, node name = %s.", op_desc.GetName().c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;
}
return SUCCESS;
@@ -176,39 +194,48 @@ Status AiCoreOpTask::InitWithKernelDef(const OpDesc &op_desc, const domi::TaskDe
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_def.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
GELOGE(INTERNAL_ERROR, "[Check][Size]args size:%zu of kernel_def is smaller than args_size_:%u, op:%s op_type:%s",
kernel_def.args().size(), args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "args size:%zu of kernel_def is smaller than args_size_:%u op:%s op_type:%s.",
kernel_def.args().size(), args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_def.args().data(), args_size_);
if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
GELOGE(INTERNAL_ERROR, "[Update][Date]AiCoreTask memcpy args failed, op:%s op_type:%s.",
op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "AiCoreTask memcpy args failed, op:%s op_type:%s.",
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

if (context.args_offset().size() < sizeof(uint16_t)) {
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
GELOGE(INTERNAL_ERROR, "[Check][Size]Invalid args_offset,"
"size:%zu is smaller than size of uint16_t, op:%s op_type:%s",
context.args_offset().size(), op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "Invalid args_offset, size:%zu is smaller than size of uint16_t, op:%s op_type:%s",
context.args_offset().size(), op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
GELOGE(INTERNAL_ERROR,
"[%s] Arg offset out of range. offset = %u, arg size = %u",
GetName().c_str(),
offset,
args_size_);
GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u,"
"arg size = %u , op:%s op_type:%s", GetName().c_str(), offset, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

arg_base_ = reinterpret_cast<uintptr_t *>(args_.get() + offset);
max_arg_count_ = (args_size_ - offset) / sizeof(void *);
GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d, arg base = %p, arg size = %u",
op_desc.GetName().c_str(),
stub_name_.c_str(),
block_dim_,
arg_base_,
args_size_);
GELOGD("[%s] Done setting kernel args successfully. stub_func = %s, block_dim = %d,"
"arg base = %p, arg size = %u",
op_desc.GetName().c_str(), stub_name_.c_str(),
block_dim_, arg_base_, args_size_);
return SUCCESS;
}

@@ -225,29 +252,42 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do
args_.reset(new(std::nothrow) uint8_t[args_size_]);
GE_CHECK_NOTNULL(args_);
if (kernel_with_handle.args().size() < args_size_) {
GELOGE(INTERNAL_ERROR, "args size of kernel_def is smaller than args_size_");
GELOGE(INTERNAL_ERROR, "[Check][Size]args size:%zu of kernel_def is smaller than args_size_:%u. op:%s op_type:%s",
kernel_with_handle.args().size(), args_size_, op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "args size:%zu of kernel_def is smaller than args_size_:%u. op:%s op_type:%s",
kernel_with_handle.args().size(), args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}
errno_t err = memcpy_s(args_.get(), args_size_, kernel_with_handle.args().data(), args_size_);

if (err != EOK) {
GELOGE(INTERNAL_ERROR, "AiCoreTask memcpy args failed.");
GELOGE(INTERNAL_ERROR, "[Update][Date]AiCoreTask memcpy args failed. op:%s op_type:%s",
op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_CALL_ERROR("E19999", "AiCoreTask memcpy args failed. op:%s op_type:%s",
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

if (context.args_offset().size() < sizeof(uint16_t)) {
GELOGE(INTERNAL_ERROR, "Invalid args_offset, size = %zu.", context.args_offset().size());
GELOGE(INTERNAL_ERROR, "[Check][Size]Invalid args_offset, size:%zu is smaller"
"than size of uint16_t. op:%s op_type:%s", context.args_offset().size(),
op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "Invalid args_offset, size:%zu is smaller"
"than size of uint16_t. op:%s op_type:%s", context.args_offset().size(),
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

const auto *args_offset_buffer = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint32_t offset = *args_offset_buffer;
if (offset > args_size_) {
GELOGE(INTERNAL_ERROR,
"[%s] Arg offset out of range. offset = %u, arg size = %u",
GetName().c_str(),
offset,
args_size_);
GELOGE(INTERNAL_ERROR, "[Check][Offset][%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Arg offset out of range. offset = %u, arg size = %u"
"op:%s op_type:%s", GetName().c_str(), offset, args_size_,
op_desc.GetName().c_str(), op_desc.GetType().c_str());
return INTERNAL_ERROR;
}

@@ -257,11 +297,16 @@ Status AiCoreOpTask::InitWithKernelDefWithHandle(const OpDesc &op_desc, const do
}

Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
"[%s] Failed to validate task def: [%s]",
op_desc.GetName().c_str(),
task_def.DebugString().c_str());

auto rt_ret = ValidateTaskDef(task_def);
if (rt_ret != SUCCESS) {
REPORT_CALL_ERROR("E19999", "op:%s(op_type:%s) failed to validate task def:%s",
op_desc.GetName().c_str(), op_desc.GetType().c_str(), task_def.DebugString().c_str());
GELOGE(rt_ret, "[Invoke][ValidateTaskDef]failed for op:%s(op_type:%s) to validate task def:%s",
op_desc.GetName().c_str(), op_desc.GetType().c_str(), task_def.DebugString().c_str());
return rt_ret;
}
if (task_def.type() != RT_MODEL_TASK_ALL_KERNEL) {
GE_CHK_STATUS_RET(InitWithKernelDef(op_desc, task_def));
} else {
@@ -273,14 +318,18 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
Status AiCoreOpTask::ValidateTaskDef(const domi::TaskDef &task_def) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
if (task_type != RT_MODEL_TASK_KERNEL && task_type != RT_MODEL_TASK_ALL_KERNEL) {
GELOGE(INTERNAL_ERROR, "Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
GELOGE(INTERNAL_ERROR,
"[Check][TaskType]Invalid task type (%d) in AiCore CreateTask.", static_cast<int>(task_type));
return INTERNAL_ERROR;
}
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type != ccKernelType::TE) {
GELOGE(INTERNAL_ERROR, "Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
GELOGE(INTERNAL_ERROR,
"[Check][TaskType]Invalid kernel type(%d) in AiCore TaskDef.", static_cast<int>(kernel_type));
REPORT_INNER_ERROR("E19999", "Invalid kernel type(%d) in AiCore TaskDef.",
static_cast<int>(kernel_type));
return INTERNAL_ERROR;
}

@@ -324,13 +373,22 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
return SUCCESS;
}
if (tiling_buffer_ == nullptr) {
GELOGE(INTERNAL_ERROR, "tiling_buffer is nullptr while tiling_data is not empty!");
GELOGE(INTERNAL_ERROR, "[Check][Buffer] %s tiling_buffer is nullptr while tiling_data is not empty!",
op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999", "%s tiling_buffer is nullptr while tiling_data is not empty.",
op_desc->GetName().c_str());
return INTERNAL_ERROR;
}

if (tiling_data_.size() > tiling_buffer_->GetSize()) {
GELOGE(INTERNAL_ERROR, "[%s] Tiling data size now (%zu) shouldn't larger than we alloc before (%zu).",
stub_name_.c_str(), tiling_data_.size(), tiling_buffer_->GetSize());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] Tiling data size now (%zu)"
"shouldn't larger than we alloc before (%zu). op:%s op_type:%s",
stub_name_.c_str(), tiling_data_.size(), tiling_buffer_->GetSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str());
REPORT_INNER_ERROR("E19999", "[%s] Tiling data size now (%zu)"
"shouldn't larger than we alloc before (%zu). op:%s op_type:%s",
stub_name_.c_str(), tiling_data_.size(), tiling_buffer_->GetSize(),
op_desc->GetName().c_str(), op_desc->GetType().c_str());
return INTERNAL_ERROR;
}

@@ -347,24 +405,27 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {
Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) {
GELOGD("[%s] Start to invoke OpParaCalculate.", node->GetName().c_str());
GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info),
"Failed calc tiling data of node %s.",
"[Invoke][OpParaCalculate]Failed calc tiling data of node %s.",
node->GetName().c_str());
GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str());
return SUCCESS;
}

Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() + task_context.NumWorkspaces()
size_t expected_arg_count = task_context.NumInputs() + task_context.NumOutputs() +
task_context.NumWorkspaces()
- output_indices_to_skip_.size();
if (tiling_buffer_ != nullptr) {
++expected_arg_count;
}
if (expected_arg_count > max_arg_count_) {
GELOGE(INTERNAL_ERROR,
"[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
"[Check][arg_count][%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(),
max_arg_count_,
expected_arg_count);
REPORT_INNER_ERROR("E19999", "[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(), max_arg_count_, expected_arg_count);
return INTERNAL_ERROR;
}

@@ -378,7 +439,8 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) {
for (int i = 0; i < task_context.NumOutputs(); ++i) {
const auto output = task_context.GetOutput(i);
GE_CHECK_NOTNULL(output);
if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) != output_indices_to_skip_.end()) {
if (find(output_indices_to_skip_.begin(), output_indices_to_skip_.end(), i) !=
output_indices_to_skip_.end()) {
GELOGD("Node:%s output[%d] is an optional, the address don't need to be saved.",
task_context.GetNodeName(), i);
continue;
@@ -410,12 +472,12 @@ Status AiCoreOpTask::LaunchKernel(rtStream_t stream) {
if (handle_ != nullptr) {
std::string dev_func = original_kernel_key_ + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + std::to_string(tiling_key_);
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), args_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).", dev_func.c_str(),
block_dim_);
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle Start (dev_func = %s, block_dim = %u).",
dev_func.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(),
args_size_, nullptr, stream, kernel_info.c_str()));
GELOGD("AiCoreOpTask rtKernelLaunchWithHandle End (dev_func = %s, block_dim = %u).",
dev_func.c_str(), block_dim_);
} else {
GELOGD("AiCoreOpTask LaunchKernel Start (task = %s, block_dim = %u).", stub_name_.c_str(), block_dim_);
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), args_size_, nullptr, stream));
@@ -436,7 +498,8 @@ Status AiCoreOpTask::InitTilingInfo(const OpDesc &op_desc) {
(void) AttrUtils::GetInt(op_desc, GetKeyForOpParamSize(), max_size);
GELOGD("Got op param size by key: %s, ret = %ld", GetKeyForOpParamSize().c_str(), max_size);
if (max_size < 0) {
GELOGE(PARAM_INVALID, "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
GELOGE(PARAM_INVALID, "[Check][Size][%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
REPORT_INNER_ERROR("E19999", "[%s] Invalid op_param_size: %ld.", op_desc.GetName().c_str(), max_size);
return PARAM_INVALID;
}

@@ -494,8 +557,10 @@ Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc)
workspace_info = op_desc.TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, workspace_info);
if (atomic_output_indices.empty() && workspace_info.empty()) {
GELOGE(INTERNAL_ERROR,
"[%s] Neither ATOMIC_ATTR_OUTPUT_INDEX nor EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty.",
"[Check][Size][%s] ATOMIC_ATTR_OUTPUT_INDEX and EXT_ATTR_ATOMIC_WORKSPACE_INFO is empty. check invalid",
op_desc.GetName().c_str());
REPORT_INNER_ERROR("E19999", "[%s] ATOMIC_ATTR_OUTPUT_INDEX and EXT_ATTR_ATOMIC_WORKSPACE_INFO"
"is empty. check invalid", op_desc.GetName().c_str());
return INTERNAL_ERROR;
}

@@ -522,11 +587,10 @@ Status AtomicAddrCleanOpTask::InitAtomicAddrCleanIndices(const OpDesc &op_desc)
}

if (arg_count > max_arg_count_) {
GELOGE(INTERNAL_ERROR,
"[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(),
max_arg_count_,
arg_count);
GELOGE(INTERNAL_ERROR, "[Check][arg_count][%s] Invalid arg memory, max arg count = %u,"
"but expect = %zu", GetName().c_str(), max_arg_count_, arg_count);
REPORT_INNER_ERROR("E19999", "[%s] Invalid arg memory, max arg count = %u, but expect = %zu",
GetName().c_str(), max_arg_count_, arg_count);
return INTERNAL_ERROR;
}

@@ -556,7 +620,7 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co
Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) {
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str());
GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info),
"Failed calc tiling data of node %s.",
"[Invoke][OpAtomicCalculate]Failed calc tiling data of node %s.",
node->GetName().c_str());
GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str());
return SUCCESS;


+ 10
- 10
ge/hybrid/node_executor/aicore/aicore_task_builder.cc View File

@@ -42,10 +42,10 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
bool is_single_op) {
GE_CHECK_NOTNULL(op_desc_);
if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) {
GELOGE(INTERNAL_ERROR,
"[%s] At most 2 task was supported, but got %zu",
op_desc_->GetName().c_str(),
task_defs_.size());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] At most %zu task was supported, but got %zu",
op_desc_->GetName().c_str(), kNumTaskWithAtomicAddrCleanTask, task_defs_.size());
REPORT_INNER_ERROR("E19999", "[%s] At most %zu task was supported, but got %zu, check invalid.",
op_desc_->GetName().c_str(), kNumTaskWithAtomicAddrCleanTask, task_defs_.size());
return INTERNAL_ERROR;
}

@@ -58,10 +58,10 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
task_defs_.size());
return SUCCESS;
} else {
GELOGE(INTERNAL_ERROR,
"[%s] AtomicAddrClean task was expected, but got %zu task_defs",
op_desc_->GetName().c_str(),
task_defs_.size());
GELOGE(INTERNAL_ERROR, "[Check][Size][%s] AtomicAddrClean task was expected:%zu, but got %zu task_defs",
op_desc_->GetName().c_str(), kNumTaskWithAtomicAddrCleanTask, task_defs_.size());
REPORT_INNER_ERROR("E19999", "[%s] AtomicAddrClean task was expected:%zu, but got %zu task_defs,",
op_desc_->GetName().c_str(), kNumTaskWithAtomicAddrCleanTask, task_defs_.size());
return INTERNAL_ERROR;
}
}
@@ -72,7 +72,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
GE_CHECK_NOTNULL(atomic_task);
atomic_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
"[%s] Failed to init task for AtomicAddrClean",
"[Invoke][AtomicAddrCleanOpTask::Init] failed for [%s].",
op_desc_->GetName().c_str());
op_tasks.emplace_back(std::move(atomic_task));
}
@@ -82,7 +82,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
GE_CHECK_NOTNULL(aicore_task);
aicore_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()),
"[%s] Failed to init task for AtomicAddrClean",
"[Invoke][AiCoreOpTask::Init] failed for [%s].",
op_desc_->GetName().c_str());
op_tasks.emplace_back(std::move(aicore_task));



+ 5
- 6
ge/hybrid/node_executor/aicore/aicore_task_compiler.cc View File

@@ -34,7 +34,8 @@ Status AiCoreTaskCompiler::Initialize() {
auto ge_lib = GELib::GetInstance();
GE_CHECK_NOTNULL(ge_lib);
if (!ge_lib->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge_lib is uninitialized, failed.");
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "[Check][State] failed, because Ge_lib is uninitialized.");
REPORT_INNER_ERROR("E19999", "Initialize failed, because Ge_lib is uninitialized.");
return GE_CLI_GE_NOT_INITIALIZED;
}
auto &kernel_manager = ge_lib->OpsKernelManagerObj();
@@ -49,11 +50,9 @@ Status AiCoreTaskCompiler::DoCompileOp(const NodePtr &node) const {
vector<NodePtr> node_vec;
node_vec.emplace_back(node);
GE_CHK_STATUS_RET(aic_kernel_store_->CompileOpRun(node_vec),
"Failed to execute CompileOp, node = %s",
node->GetName().c_str());
"[Invoke][CompileOpRun] Failed, node = %s", node->GetName().c_str());
GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().CalcOpRunningParam(*node),
"Failed to execute CalcOpRunningParam, node = %s",
node->GetName().c_str());
"[Invoke][CalcOpRunningParam] Failed, node = %s", node->GetName().c_str());
return SUCCESS;
}

@@ -102,7 +101,7 @@ Status AiCoreTaskCompiler::DoGenerateTask(const Node &node,
ret = OpsKernelBuilderManager::Instance().GenerateTask(node, context, tasks);
}

GE_CHK_STATUS(ret, "Failed to execute GenerateTask, node = %s", node.GetName().c_str());
GE_CHK_STATUS(ret, "[Invoke][GenerateTask] Failed, node = %s", node.GetName().c_str());
GE_CHK_RT(rtModelUnbindStream(rt_model_, stream));
GE_CHK_RT(rtModelDestroy(rt_model_));
return ret;


+ 90
- 33
ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc View File

@@ -29,8 +29,9 @@ constexpr int64_t kDimEndFlag = INT64_MIN;
Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
GELOGI("Node[%s] parse ext info start.", node_name_.c_str());
if (ext_info.empty()) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.",
node_name_.c_str());
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Param:ext_info]Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}

@@ -39,7 +40,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
GE_CHECK_NOTNULL(ext_info_);

if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to coy ext info", node_name_.c_str());
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][ext_info_][%s] Failed to copy ext info", node_name_.c_str());
REPORT_CALL_ERROR("E19999", "[%s] Failed to copy ext info.", node_name_.c_str());
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}

@@ -53,22 +55,22 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
switch (aicpu_ext_info->infoType) {
case aicpu::FWKAdapter::FWK_ADPT_EXT_SHAPE_TYPE:
GE_CHK_STATUS_RET(ParseExtShapeType(aicpu_ext_info), "Parse ext shape type failed.");
GE_CHK_STATUS_RET(ParseExtShapeType(aicpu_ext_info), "[Parse][ExtShapeType] failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_INPUT_SHAPE:
GE_CHK_STATUS_RET(ParseExtInputShape(aicpu_ext_info), "Parse ext input shape failed.");
GE_CHK_STATUS_RET(ParseExtInputShape(aicpu_ext_info), "[Parse][ExtInputShape] failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE:
GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed.");
GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "[Parse][ExtOutputShape] failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO:
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed.");
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "[Parse][ExtSessionInfo] failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_BITMAP:
GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "Parse ext bit map failed.");
GE_CHK_STATUS_RET(ParseExtBitMap(aicpu_ext_info), "[Parse][ExtBitMap] failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_UPDATE_ADDR:
GE_CHK_STATUS_RET(ParseExtUpdateAddr(aicpu_ext_info), "Parse ext update_addr failed.");
GE_CHK_STATUS_RET(ParseExtUpdateAddr(aicpu_ext_info), "[Parse][ExtUpdateAddr] failed.");
break;
default:
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.",
@@ -79,33 +81,51 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
offset += aicpu_ext_info->infoLen;
}

GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.",
GE_IF_BOOL_EXEC(offset != ext_info_len_,
REPORT_INNER_ERROR("E19999", "Node[%s] ext_info format error, parse not reach end,"
"offset=%zu, ext_info_len=%zu.", node_name_.c_str(), offset, ext_info_len_);
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]Node[%s] ext_info format error,"
"parse not reach end, offset=%zu, ext_info_len=%zu.",
node_name_.c_str(), offset, ext_info_len_);
return ACL_ERROR_GE_PARAM_INVALID;);
GELOGI("Node[%s] parse ext info end.", node_name_.c_str());
return SUCCESS;
}

Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] parse ext shape type failed as infoLen must be %zu but %u.",
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(int32_t),
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Size]Node[%s] parse ext shape type failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen);
return ACL_ERROR_GE_PARAM_INVALID;);

auto type = reinterpret_cast<const int32_t *>(aicpu_ext_info->infoMsg);

GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] parse ext shape type failed as need %d but %d.",
GE_IF_BOOL_EXEC(*type != unknown_type_,
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext shape type failed as need %d but %d.",
node_name_.c_str(), unknown_type_, *type);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][Type]Node[%s] parse ext shape type failed as need %d but %d.",
node_name_.c_str(), unknown_type_, *type);
return ACL_ERROR_GE_PARAM_INVALID;);
GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
return SUCCESS;
}

Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) {
auto need_len = input_num_ * sizeof(AicpuShapeAndType);
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] parse ext input shape failed as infoLen must be "
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != need_len,
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext input shape failed as infoLen must be "
"input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType),
aicpu_ext_info->infoLen);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][DataLen]Node[%s] parse ext input shape failed as infoLen must be "
"input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
return ACL_ERROR_GE_PARAM_INVALID;);

auto input = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);

@@ -123,10 +143,16 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
return SUCCESS;
}
auto need_len = output_num_ * sizeof(AicpuShapeAndType);
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] parse ext output shape failed as infoLen must be "
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != need_len,
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext output shape failed as infoLen must be "
"output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType),
aicpu_ext_info->infoLen);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][DataLen]Node[%s] parse ext output shape failed as infoLen must be "
"output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.",
node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen);
return ACL_ERROR_GE_PARAM_INVALID;);

auto output = reinterpret_cast<AicpuShapeAndType *>(aicpu_ext_info->infoMsg);
for (uint32_t index = 0; index < output_num_; ++index) {
@@ -137,9 +163,14 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
}

Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), ACL_ERROR_GE_PARAM_INVALID,
"Node[%s] parse ext session info failed as infoLen must be %zu but %u.",
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(AicpuSessionInfo),
REPORT_INNER_ERROR("E19999",
"Node[%s] parse ext session info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen);
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][DataLen]Node[%s] parse ext session info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen);
return ACL_ERROR_GE_PARAM_INVALID;);

session_info_ = reinterpret_cast<AicpuSessionInfo *>(aicpu_ext_info->infoMsg);
GELOGI("Node[%s] parse session info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
@@ -147,9 +178,14 @@ Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) {
}

Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint64_t), PARAM_INVALID,
"Node[%s] parse bit_map info failed as infoLen must be %zu but %u.",
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(uint64_t),
REPORT_INNER_ERROR("E19999",
"Node[%s] parse bit_map info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen);
GELOGE(PARAM_INVALID,
"[Check][DataLen]Node[%s] parse bit_map info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint64_t), aicpu_ext_info->infoLen);
return PARAM_INVALID;);

bit_map_ = reinterpret_cast<uint64_t *>(aicpu_ext_info->infoMsg);
GELOGI("Node[%s] bit_map info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
@@ -157,9 +193,14 @@ Status AicpuExtInfoHandler::ParseExtBitMap(AicpuExtInfo *aicpu_ext_info) {
}

Status AicpuExtInfoHandler::ParseExtUpdateAddr(AicpuExtInfo *aicpu_ext_info) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(uint32_t), PARAM_INVALID,
"Node[%s] parse update_addr info failed as infoLen must be %zu but %u.",
GE_IF_BOOL_EXEC(aicpu_ext_info->infoLen != sizeof(uint32_t),
REPORT_INNER_ERROR("E19999",
"Node[%s] parse update_addr info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint32_t), aicpu_ext_info->infoLen);
GELOGE(PARAM_INVALID,
"[Check][DataLen]Node[%s] parse update_addr info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(uint32_t), aicpu_ext_info->infoLen);
return PARAM_INVALID;);

update_addr_ = reinterpret_cast<uint32_t *>(aicpu_ext_info->infoMsg);
GELOGI("Node[%s] update_addr info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
@@ -207,15 +248,19 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const
const auto &shape = input_desc.GetShape();

GE_CHK_STATUS_RET(UpdateShapeAndType(shape, input_desc.GetDataType(), input_shape_and_type_[input_index]),
"Node[%s] input[%u] update input shape and type failed.",
"[Update][ShapeAndType] failed, Node[%s] input[%u] .",
node_name_.c_str(), input_index);
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) {
GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), ACL_ERROR_GE_INTERNAL_ERROR,
"Node[%s] is depend compute is no need update output shape and type by ext.",
GE_IF_BOOL_EXEC((unknown_type_ == DEPEND_COMPUTE),
REPORT_INNER_ERROR("E19999", "Node[%s] is depend compute is no need update output shape"
"and type by ext.", node_name_.c_str());
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Check][Type]Node[%s] is depend compute is no need update output shape and type by ext.",
node_name_.c_str());
return ACL_ERROR_GE_INTERNAL_ERROR;);
GE_CHECK_LE(output_index, output_num_);
auto shape = output_desc.GetShape();

@@ -223,9 +268,13 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons
if (unknown_type_ == DEPEND_SHAPE_RANGE) {
std::vector<std::pair<int64_t, int64_t>> range;
auto range_ret = output_desc.GetShapeRange(range);
GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, ACL_ERROR_GE_INTERNAL_ERROR,
"Node[%s] is shape range type but get GetShapeRange failed, ret=%u.",
GE_IF_BOOL_EXEC(range_ret != GRAPH_SUCCESS,
REPORT_INNER_ERROR("E19999", "Node[%s] is shape range type but get GetShapeRange failed, ret=%u",
node_name_.c_str(), range_ret);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"[Invoke][GetShapeRange]Node[%s] is shape range type but get GetShapeRange failed, ret=%u",
node_name_.c_str(), range_ret);
return ACL_ERROR_GE_INTERNAL_ERROR;);
for (size_t k = 0; k < range.size(); ++k) {
if (shape.GetDim(k) < 0 && k < range.size()) {
GELOGD("Node[%s] output[%u] update dim[%zu] from %ld to range max %ld.",
@@ -239,9 +288,14 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons
}

Status AicpuExtInfoHandler::GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type) {
GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR,
"Node[%s] is depend compute type can not get output shape and type by ext.",
GE_IF_BOOL_EXEC((unknown_type_ == DEPEND_COMPUTE),
REPORT_INNER_ERROR("E19999",
"Node[%s] is depend compute type can not get output shape and type by ext.",
node_name_.c_str());
GELOGE(INTERNAL_ERROR,
"[Check][Type]Node[%s] is depend compute type can not get output shape and type by ext.",
node_name_.c_str());
return INTERNAL_ERROR;);
GetShapeAndType(output_shape_and_type_[output_index], shape, data_type);
return SUCCESS;
}
@@ -254,8 +308,11 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da
AicpuShapeAndType *shape_and_type) {
auto dim_num = shape.GetDimNum();
if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.",
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Check][DimNum]Update shape and type failed, as dim_num %zu is over max shape dims %u.",
dim_num, aicpu::FWKAdapter::kMaxShapeDims);
REPORT_INNER_ERROR("E19999", "Update shape and type failed, as dim_num %zu is over max shape dims %u.",
dim_num, aicpu::FWKAdapter::kMaxShapeDims);
return ACL_ERROR_GE_PARAM_INVALID;
}
size_t index = 0;


+ 135
- 79
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -45,7 +45,9 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_
if (kernel_ext_info.empty()) {
if (node_item_->is_dynamic) {
// dynamic node must have ext info
GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
REPORT_INNER_ERROR("E19999", "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
GELOGE(PARAM_INVALID, "[Check][Param:kernel_ext_info]Node[%s] parse ext info failed as ext info is empty.",
node_name_.c_str());
return PARAM_INVALID;
} else {
// if no ext info no need copy to device.
@@ -56,18 +58,19 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_
}

GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info),
"Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.",
"[Invoke][Parse]Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.",
node_name_.c_str(), kernel_ext_info.size());
GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id);
GELOGD("To update aicpu_task ext_info session_info session_id to %ld", session_id);
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id),
"UpdateSessionInfoSessionId failed.");
"[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id);

bool execute_mode = !aicpu_ext_handle_.IsNeedRefreshIOAddr() && !node_item_->is_dynamic;
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateExecuteMode(execute_mode), "UpdateExecuteMode failed.");
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateExecuteMode(execute_mode),
"[Update][ExecuteMode] failed, node:%s.", node_name_.c_str());

// copy task args buf
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_),
"Node[%s] alloc kernel_ext_info buf failed, size=%zu",
"[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu",
node_name_.c_str(), aicpu_ext_handle_.GetExtInfoLen());

// copy default ext info to device
@@ -96,7 +99,7 @@ Status AicpuNodeTaskBase::UpdateOutputShapeFromExtInfo(TaskContext &task_context
DataType data_type;
aicpu_ext_handle_.GetOutputShapeAndType(i, shape, data_type);
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(task_context, shape, i),
"Update node %s [%d]th output shape failed.",
"[Invoke][UpdateShapeToOutputDesc]Update node %s [%d]th output shape failed.",
node_name_.c_str(), i);
}
return SUCCESS;
@@ -123,11 +126,11 @@ Status AicpuNodeTaskBase::UpdateShapeToOutputDesc(TaskContext &task_context,
auto trans_ret = formats::TransShape(format, shape_new.GetDims(),
output_desc->GetDataType(), origin_format, origin_dims_new);
GE_CHK_STATUS_RET(trans_ret,
"Node[%s] out[%d] originFormat[%d] is not same as format[%d], but TransShape failed, shape=%s.",
"[Trans][Shape] failed for Node[%s] out[%d] originFormat[%d] is not same as format[%d], shape=%s.",
node_name_.c_str(), output_index, origin_format, format, shape_new.ToString().c_str());
auto origin_shape_new = GeShape(origin_dims_new);
GE_CHK_STATUS_RET(task_context.GetNodeState()->UpdateOutputShapes(output_index, shape_new, origin_shape_new),
"Node[%s] failed to update update shape, index = %d", node_name_.c_str(), output_index);
"[Update][OutputShapes] failed for Node[%s], index = %d", node_name_.c_str(), output_index);
GELOGD("Node[%s] out[%d] originFormat[%d] is not same as format[%d], need update from %s ro %s.",
node_name_.c_str(), output_index, origin_format, format,
origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str());
@@ -145,8 +148,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() {
auto input_desc = node_item_->MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateInputShapeAndType(i, *input_desc),
"Node[%s] input[%d] update input shape failed.",
node_name_.c_str(), i);
"[Update][InputShapeAndType] failed for Node[%s] input[%d].", node_name_.c_str(), i);
}

if (unknown_type_ != DEPEND_COMPUTE) {
@@ -155,8 +157,7 @@ Status AicpuNodeTaskBase::UpdateExtInfo() {
GE_CHECK_NOTNULL(output_desc);

GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateOutputShapeAndType(j, *output_desc),
"Node[%s] output[%d] UpdateOutputShapeAndType failed.",
node_name_.c_str(), j);
"[Update][OutputShapeAndType] failed for Node[%s] output[%d].", node_name_.c_str(), j);
}
}

@@ -179,13 +180,13 @@ Status AicpuNodeTaskBase::UpdateArgs(TaskContext &context) {
return SUCCESS;
}

GE_CHK_STATUS_RET(UpdateIoAddr(context), "Node[%s] update io addr failed.", node_name_.c_str());
GE_CHK_STATUS_RET(UpdateIoAddr(context), "[Update][IoAddr] failed for Node[%s].", node_name_.c_str());
bool all_shape = false;
const OpDescPtr op_desc = node_item_->GetOpDesc();
(void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
if (node_item_->is_dynamic || all_shape) {
// dynamic node and all_shape kernel need update ext info.
GE_CHK_STATUS_RET(UpdateExtInfo(), "Node[%s] update ext info failed.", node_name_.c_str());
GE_CHK_STATUS_RET(UpdateExtInfo(), "[Update][ExtInfo] failed for Node[%s].", node_name_.c_str());
}

GELOGD("Node[%s] update args end.", node_name_.c_str());
@@ -196,14 +197,15 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AicpuNodeTaskBaseExecuteAsync] Start");
GELOGD("Node[%s] execute async start. unknown_type=%d.", node_name_.c_str(), unknown_type_);

HYBRID_CHK_STATUS_RET(LaunchTask(context), "[%s] Failed to launch task", node_name_.c_str());
HYBRID_CHK_STATUS_RET(LaunchTask(context), "[Launch][Task] failed for [%s].", node_name_.c_str());

// save profiling data
uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); // must be called after Launch kernel
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Get task_id and stream_id failed, ret: 0x%X.", rt_ret);
GELOGE(RT_FAILED, "[Get][TaskIdAndStreamID] failed, ret: 0x%X.", rt_ret);
REPORT_CALL_ERROR("E19999", "rtGetTaskIdAndStreamID failed, ret: 0x%X.", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Aicpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
@@ -241,7 +243,7 @@ Status AicpuTfNodeTask::InitForDependComputeTask() {
constexpr auto result_summary_size = sizeof(aicpu::FWKAdapter::ResultSummary);
for (auto i = 0; i < node_item_->num_outputs; ++i) {
GE_CHK_STATUS_RET(AllocTensorBuffer(result_summary_size, output_summary_[i]),
"Node[%s] alloc buffer for result summary info failed, size=%zu.",
"[Alloc][TensorBuffer] failed for Node[%s] to copy result summary info, size=%zu.",
node_name_.c_str(), result_summary_size);
}
output_summary_host_.resize(node_item_->num_outputs);
@@ -250,21 +252,21 @@ Status AicpuTfNodeTask::InitForDependComputeTask() {
// copy task need copy output_data and output_shape, max len is 2 * output_num
const size_t copy_input_buf_len = node_item_->num_outputs * 2 * sizeof(uint64_t);
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_release_flag_dev_),
"Node[%s] alloc copy task input release_flag failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input release_flag, size=%zu",
node_name_.c_str(), copy_input_buf_len);
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_data_size_dev_),
"Node[%s] alloc copy task input data_size failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input data_size, size=%zu",
node_name_.c_str(), copy_input_buf_len);
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_src_dev_),
"Node[%s] alloc copy task input src failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input src, size=%zu",
node_name_.c_str(), copy_input_buf_len);
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_input_buf_len, copy_input_dst_dev_),
"Node[%s] alloc copy task input dst failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task input dst, size=%zu",
node_name_.c_str(), copy_input_buf_len);

// copy task args buf
GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), copy_task_args_buf_),
"Node[%s] alloc copy task args buf failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task args, size=%zu",
node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL));

std::vector<uint64_t> copy_io_addr;
@@ -278,7 +280,7 @@ Status AicpuTfNodeTask::InitForDependComputeTask() {

// can alloc in init, it can reuse
GE_CHK_STATUS_RET(AllocTensorBuffer(copy_io_addr_size, copy_ioaddr_dev_),
"Node[%s] alloc copy task io buf failed, size=%zu",
"[Alloc][TensorBuffer] failed for Node[%s] to copy task ioaddr, size=%zu",
node_name_.c_str(), copy_io_addr_size);

GE_CHK_RT_RET(rtMemcpy(copy_ioaddr_dev_->GetData(), copy_io_addr_size,
@@ -289,14 +291,17 @@ Status AicpuTfNodeTask::InitForDependComputeTask() {
Status AicpuTfNodeTask::Init(const HybridModel &model) {
GELOGI("Node[%s] init start.", node_name_.c_str());

GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel_ex(), FAILED,
"Node[%s] is tf node but task def does not has kernel ex.",
GE_IF_BOOL_EXEC(!task_def_.has_kernel_ex(),
REPORT_INNER_ERROR("E19999", "[Check][TaskDef]Node[%s] is tf node"
"but task def does not has kernel ex.", node_name_.c_str());
GELOGE(FAILED, "[Check][TaskDef]Node[%s] is tf node but task def does not has kernel ex.",
node_name_.c_str());
return FAILED;);

auto &kernel_ex_def = task_def_.kernel_ex();
auto kernel_workspace_size = kernel_ex_def.task_info().size();
GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_workspace_size, kernel_workspace_),
"Node[%s] alloc buffer for kernel workspace failed, size=%zu.",
"[Alloc][TensorBuffer] failed for Node[%s] to copy kernel workspace, size=%zu.",
node_name_.c_str(), kernel_workspace_size);

GE_CHK_RT_RET(rtMemcpy(kernel_workspace_->GetData(), kernel_workspace_size,
@@ -306,30 +311,38 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) {
auto input_output_size = (node_item_->num_inputs + node_item_->num_outputs) * sizeof(uint64_t);
// alloc input output addr buf, allow alloc size 0
GE_CHK_STATUS_RET(AllocTensorBuffer(input_output_size, input_output_addr_),
"Node[%s] alloc buffer for io addr failed, size=%zu.",
"[Alloc][TensorBuffer] for Node[%s] to copy io addr, size=%zu.",
node_name_.c_str(), input_output_size);

auto &kernel_ext_info = kernel_ex_def.kernel_ext_info();
auto kernel_ext_info_size = kernel_ex_def.kernel_ext_info_size();
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED,
"Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
GE_IF_BOOL_EXEC(kernel_ext_info.size() != kernel_ext_info_size,
REPORT_INNER_ERROR("E19999", "[Check][Size]Node[%s] task def kernel_ext_info.size=%zu,"
"but kernel_ext_info_size=%u.",
node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size);
GELOGE(FAILED, "[Check][Size]Node[%s] task def kernel_ext_info.size=%zu,"
"but kernel_ext_info_size=%u.",
node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size);
return FAILED;);

// init ext info
uint64_t ext_session_id = model.GetSessionId();
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name_.c_str());
GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str());
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "[Init][ExtInfo] failed for Node[%s].",
node_name_.c_str());
GE_CHK_STATUS_RET(InitForDependComputeTask(), "[Init][DependComputeTask] failed for Node[%s].", node_name_.c_str());

// build fwk_op_kernel.
GE_CHK_BOOL_RET_STATUS(sizeof(STR_FWK_OP_KERNEL) >= kernel_ex_def.args_size(), FAILED,
"Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u",
GE_IF_BOOL_EXEC(sizeof(STR_FWK_OP_KERNEL) < kernel_ex_def.args_size(),
REPORT_INNER_ERROR("E19999", "Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u",
node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size());
GELOGE(FAILED, "[Check][Size]Node[%s] sizeof STR_FWK_OP_KERNEL is: %zu, but args_size is: %u",
node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL), kernel_ex_def.args_size());

return FAILED;);
STR_FWK_OP_KERNEL fwk_op_kernel = {0};
errno_t sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL),
kernel_ex_def.args().data(), kernel_ex_def.args_size());
GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR,
"Node[%s] memcpy fwk_op_kernel failed, ret: %d.", node_name_.c_str(), sec_ret);
"[Update][fwk_op_kernel] failed for Node[%s], ret: %d.", node_name_.c_str(), sec_ret);

fwk_op_kernel.fwkKernelBase.fwk_kernel.workspaceBaseAddr = reinterpret_cast<uintptr_t>(kernel_workspace_->GetData());
fwk_op_kernel.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(input_output_addr_->GetData());
@@ -343,12 +356,13 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) {
fwk_op_kernel.fwkKernelBase.fwk_kernel.stepIDAddr = GetStepIdAddr(model);

auto session_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.sessionID;
GE_CHK_STATUS_RET(EnsureSessionCreated(session_id), "Node[%s] create session id %lu failed.",
GE_CHK_STATUS_RET(EnsureSessionCreated(session_id),
"[Invoke][EnsureSessionCreated]Node[%s] create session id %lu failed.",
node_name_.c_str(), session_id);

// alloc kernel_buf_ and copy to device.
GE_CHK_STATUS_RET(AllocTensorBuffer(sizeof(STR_FWK_OP_KERNEL), kernel_buf_),
"Node[%s] alloc buffer for kernel buf failed, size=%zu.",
"[Alloc][TensorBuffer] for Node[%s] to copy kernel_buf, size=%zu.",
node_name_.c_str(), sizeof(STR_FWK_OP_KERNEL));

GE_CHK_RT_RET(rtMemcpy(kernel_buf_->GetData(), sizeof(STR_FWK_OP_KERNEL),
@@ -378,20 +392,23 @@ Status AicpuTfNodeTask::SetMemCopyTask(const domi::TaskDef &task_def) {
GELOGD("Start to set memcpy task for node[%s].", node_name_.c_str());
const domi::KernelExDef &kernel_def = task_def.kernel_ex();
if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) {
GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d",
GELOGE(PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is:%lu, but args_size:%d is bigger",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
REPORT_INNER_ERROR("E19999", "sizeof STR_FWK_OP_KERNEL is:%lu, but args_size:%d is bigger.",
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size());
return PARAM_INVALID;
}
STR_FWK_OP_KERNEL aicpu_task = {0};
auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL),
kernel_def.args().data(), kernel_def.args_size());
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
GELOGE(FAILED, "[Update][aicpu_task] failed, ret: %d", sec_ret);
REPORT_CALL_ERROR("E19999", "update aicpu_task failed, ret: %d.", sec_ret);
return FAILED;
}

GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_def.task_info_size(), copy_workspace_buf_),
"Node[%s] alloc copy task workspace buf failed, size=%u.",
"[Alloc][TensorBuffer] for Node[%s] to copy task workspace buf, size=%u.",
node_name_.c_str(), kernel_def.task_info_size());

GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_->GetData(), kernel_def.task_info_size(),
@@ -422,7 +439,7 @@ Status AicpuTfNodeTask::EnsureSessionCreated(uint64_t session_id) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
GE_CHK_STATUS_RET(model_manager->CreateAicpuSession(session_id),
"Create aicpu session %lu failed", session_id);
"[Create][AicpuSession] failed, session_id:%lu", session_id);
return SUCCESS;
}

@@ -437,15 +454,15 @@ Status AicpuTfNodeTask::ReadResultSummaryAndPrepareMemory(TaskContext &context,
auto raw_data_size = result_summary.raw_data_size;
std::unique_ptr<TensorBuffer> tensor_buffer;
GE_CHK_STATUS_RET(AllocTensorBuffer(raw_data_size, tensor_buffer),
"Node[%s] out[%d] alloc tensor buffer failed, raw_data_size=%lu",
"[Alloc][TensorBuffer] failed for Node[%s] out[%d] to copy tensor buffer, raw_data_size:%lu",
node_name_.c_str(), i, raw_data_size);
auto status = context.SetOutput(i, TensorValue(std::shared_ptr<TensorBuffer>(tensor_buffer.release())));
GE_CHK_STATUS_RET(status, "Node[%s] set output %d failed.", node_name_.c_str(), i);
GE_CHK_STATUS_RET(status, "[Set][Output] failed for Node[%s], output:%d.", node_name_.c_str(), i);

auto shape_data_size = result_summary.shape_data_size;
std::unique_ptr<TensorBuffer> shape_buffer;
GE_CHK_STATUS_RET(AllocTensorBuffer(shape_data_size, shape_buffer),
"Node[%s] out[%d] alloc shape buffer failed, shape_data_size=%lu",
"[Alloc][TensorBuffer] failed for Node[%s] out[%d] to copy shape buffer, shape_data_size:%lu",
node_name_.c_str(), i, shape_data_size);
out_shape_hbm.emplace_back(std::move(shape_buffer));
}
@@ -456,7 +473,7 @@ Status AicpuTfNodeTask::CopyDataToHbm(TaskContext &context,
const std::vector<std::unique_ptr<TensorBuffer>> &out_shape_hbm) {
GE_CHK_BOOL_RET_STATUS(out_shape_hbm.size() == static_cast<std::size_t>(node_item_->num_outputs),
INTERNAL_ERROR,
"Node[%s] has %d outputs but out shape is %zu.",
"[Check][Size]Node[%s] has %d outputs but out shape is %zu not equal.",
node_name_.c_str(), node_item_->num_outputs, out_shape_hbm.size());

GE_CHK_STATUS_RET_NOLOG(PrepareCopyInputs(context, out_shape_hbm));
@@ -525,7 +542,7 @@ Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context,
if (result_summary.shape_data_size > 0) {
const auto &shape_hbm = out_shape_hbm[i];
GE_CHK_BOOL_RET_STATUS((result_summary.shape_data_size % sizeof(int64_t) == 0), INTERNAL_ERROR,
"Node[%s] [%d]th output shape data size is %lu is not divided by int64_t.",
"[Check][Size]Node[%s] [%d]th output shape data size is %lu is not divided by int64_t.",
node_name_.c_str(), i, result_summary.shape_data_size);
uint32_t dim_num = result_summary.shape_data_size / sizeof(int64_t);
GELOGD("Node[%s] [%d]th output dim num=%u.", node_name_.c_str(), i, dim_num);
@@ -539,7 +556,7 @@ Status AicpuTfNodeTask::UpdateShapeByHbmBuffer(TaskContext &context,
}
}
GE_CHK_STATUS_RET(UpdateShapeToOutputDesc(context, GeShape(shape_dims), i),
"Node[%s] update [%d]th output shape failed.",
"[Invoke][UpdateShapeToOutputDesc]Node[%s] update [%d]th output shape failed.",
node_name_.c_str(), i);
}
return SUCCESS;
@@ -550,20 +567,20 @@ Status AicpuTfNodeTask::UpdateShapeAndDataByResultSummary(TaskContext &context)

std::vector<std::unique_ptr<TensorBuffer>> out_shape_hbm;
GE_CHK_STATUS_RET(ReadResultSummaryAndPrepareMemory(context, out_shape_hbm),
"Node[%s] read ResultSummary and update output shape failed.",
"[Invoke][ReadResultSummaryAndPrepareMemory] failed for Node[%s].",
node_name_.c_str());

RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(),
"[ReadResultSummaryAndPrepareMemory] End");

GE_CHK_STATUS_RET(CopyDataToHbm(context, out_shape_hbm),
"Node[%s] copy data to output failed.",
"[Invoke][CopyDataToHbm] failed for Node[%s] copy data to output.",
node_name_.c_str());

RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[CopyDataToHbm] End");

GE_CHK_STATUS_RET(UpdateShapeByHbmBuffer(context, out_shape_hbm),
"Node[%s] update shape by hbm buffer failed.",
"[Update][ShapeByHbmBuffer] failed for Node[%s].",
node_name_.c_str());

GELOGD("Node[%s] update shape and data by result summary end.", node_name_.c_str());
@@ -598,7 +615,7 @@ Status AicpuTfNodeTask::UpdateIoAddr(TaskContext &context) {
GELOGD("Node[%s] is depend compute node, use result summary as out addr.", node_name_.c_str());
GE_CHK_BOOL_RET_STATUS(output_summary_.size() == static_cast<std::size_t>(node_item_->num_outputs),
INTERNAL_ERROR,
"Node[%s] has %d output but %zu output summary.",
"[Check][Size]Node[%s] has %d output but %zu output summary not equal.",
node_name_.c_str(), node_item_->num_outputs, output_summary_.size());

for (auto j = 0; j < node_item_->num_outputs; ++j) {
@@ -655,10 +672,11 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
GELOGD("Node[%s] init start.", node_name.c_str());

GE_CHK_BOOL_RET_STATUS(unknown_type_ != DEPEND_COMPUTE, FAILED,
"Node[%s] unknown type[%d] is depend compute, it's not supported now.",
"[Check][Type]Node[%s] unknown type[%d] is depend compute, it's not supported now.",
node_name.c_str(), unknown_type_);

GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel(), FAILED, "Node[%s] task def does not has kernel.", node_name.c_str());
GE_CHK_BOOL_RET_STATUS(task_def_.has_kernel(), FAILED,
"[Check][task_def_]Node[%s] task def does not has kernel.", node_name.c_str());
auto &kernel_def = task_def_.kernel();

auto &args = kernel_def.args();
@@ -671,52 +689,80 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
if (kernel_type == ccKernelType::CUST_AI_CPU) {
bool loaded = false;
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded),
"load cust aicpu so failed.");
"[Load][CustAicpuSo] failed, op:%s, so:%s.", op_desc->GetName().c_str(), so_name.c_str());
if (!loaded) {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(),
"[Launch][CustAicpuSo] failed, node:%s.", node_name_.c_str());
}
}

GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED,
"Node[%s] task def args.size=%zu, but args_size=%u.",
GE_IF_BOOL_EXEC(args.size() != args_size_,
REPORT_INNER_ERROR("E19999", "Node[%s] task def args.size=%zu, but args_size=%u not equal.",
node_name.c_str(), args.size(), args_size_);
GELOGE(FAILED, "[Check][Size]Node[%s] task def args.size=%zu, but args_size=%u not equal.",
node_name.c_str(), args.size(), args_size_);

GE_CHK_BOOL_RET_STATUS(args_size_ >= sizeof(aicpu::AicpuParamHead), FAILED,
"Node[%s] task def args_size=%u is less than aicpu param head len=%zu.",
return FAILED;);

GE_IF_BOOL_EXEC(args_size_ < sizeof(aicpu::AicpuParamHead),
REPORT_INNER_ERROR("E19999",
"Node[%s] task def args_size=%u is less than aicpu param head len=%zu.",
node_name.c_str(), args_size_, sizeof(aicpu::AicpuParamHead));
GELOGE(FAILED,
"[Check][Size]Node[%s] task def args_size=%u is less than aicpu param head len=%zu.",
node_name.c_str(), args_size_, sizeof(aicpu::AicpuParamHead));
return FAILED;);

args_.reset(new(std::nothrow) uint8_t[args_size_]());
GE_CHK_BOOL_RET_STATUS(args_ != nullptr, FAILED,
"Node[%s] malloc args mem failed, args_size_=%u.",
GE_IF_BOOL_EXEC(args_ == nullptr,
REPORT_INNER_ERROR("E19999", "new memory failed for Node[%s], args_size_=%u.",
node_name.c_str(), args_size_);
GELOGE(FAILED, "[Malloc][Memory] failed for Node[%s], args_size_=%u.",
node_name.c_str(), args_size_);
return FAILED;);

errno_t sec_ret = memcpy_s(args_.get(), args_size_, args.c_str(), args.size());
GE_CHK_BOOL_RET_STATUS(sec_ret == EOK, INTERNAL_ERROR,
"Node[%s] copy args failed, ret: %d", node_name_.c_str(), sec_ret);
GE_IF_BOOL_EXEC(sec_ret != EOK,
REPORT_INNER_ERROR("E19999",
"memcpy_s argc_ failed for Node[%s], ret: %d", node_name_.c_str(), sec_ret);
GELOGE(INTERNAL_ERROR,
"[Update][args] failed for Node[%s], ret: %d", node_name_.c_str(), sec_ret);
return sec_ret;);

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_.get());
auto io_num = node_item_->num_inputs + node_item_->num_outputs;

// check AicpuParamHead ioAddrNum is right.
GE_CHK_BOOL_RET_STATUS((aicpu_param_head->ioAddrNum == static_cast<uint32_t>(io_num)), PARAM_INVALID,
"Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.",
GE_IF_BOOL_EXEC((aicpu_param_head->ioAddrNum != static_cast<uint32_t>(io_num)),
REPORT_INNER_ERROR("E19999",
"Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.",
node_name.c_str(), aicpu_param_head->ioAddrNum,
node_item_->num_inputs, node_item_->num_outputs);
GELOGE(PARAM_INVALID,
"[Check][IoAddrNum]Node[%s] param head ioAddrNum=%u, but node has %d inputs and %d outputs.",
node_name.c_str(), aicpu_param_head->ioAddrNum,
node_item_->num_inputs, node_item_->num_outputs);
return PARAM_INVALID;);

auto mini_len = sizeof(aicpu::AicpuParamHead) + io_num * sizeof(uint64_t);
// check args len must over mini len.
GE_CHK_BOOL_RET_STATUS((mini_len <= aicpu_param_head->length), PARAM_INVALID,
"Node[%s] param head length=%u, but min len need %zu.",
"[Check][DataLen]Node[%s] param head length=%u, but min len need %zu.",
node_name.c_str(), aicpu_param_head->length, mini_len);

auto &kernel_ext_info = kernel_def.kernel_ext_info();
auto kernel_ext_info_size = kernel_def.kernel_ext_info_size();
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED,
"Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
GE_IF_BOOL_EXEC(kernel_ext_info.size() != kernel_ext_info_size,
REPORT_INNER_ERROR("E19999",
"Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size);
GELOGE(FAILED,
"[Check][Size]Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u",
node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size);
return FAILED;);

uint64_t ext_session_id = model.GetSessionId();
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name.c_str());
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id),
"[Init][ExtInfo] failed for Node[%s].", node_name.c_str());

if (ext_info_addr_dev_ == nullptr) {
aicpu_param_head->extInfoLength = 0;
@@ -754,9 +800,14 @@ Status AicpuNodeTask::UpdateIoAddr(TaskContext &context) {
// if has input and output, need copy to ioaddr
int cpy_ret = memcpy_s(io_addr, args_size_ - sizeof(aicpu::AicpuParamHead),
&io_addrs[0], sizeof(uint64_t) * io_addrs.size());
GE_CHK_BOOL_RET_STATUS(cpy_ret == 0, INTERNAL_ERROR,
"Node[%s] memcpy io addr to AicpuParamHead failed, ret=%d, args_size=%u, io nums=%zu.",
GE_IF_BOOL_EXEC(cpy_ret != 0,
REPORT_INNER_ERROR("E19999", "Node[%s] memcpy io addr to AicpuParamHead failed,"
"ret=%d, args_size=%u, io nums=%zu.",
node_name_.c_str(), cpy_ret, args_size_, io_addrs.size());
GELOGE(INTERNAL_ERROR, "[Update][io_addr]Node[%s] memcpy io addr to AicpuParamHead failed,"
"ret=%d, args_size=%u, io nums=%zu.",
node_name_.c_str(), cpy_ret, args_size_, io_addrs.size());
return INTERNAL_ERROR;);
return SUCCESS;
}

@@ -815,12 +866,12 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model,
auto task_defs = model.GetTaskDefs(node);
GE_CHECK_NOTNULL(task_defs);
if (node_item->shape_inference_type != DEPEND_COMPUTE) {
GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID,
"Node[%s] task_def num[%zu] != 1", node->GetName().c_str(), (*task_defs).size());
GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 1, PARAM_INVALID, "[Check][Size]Node[%s] task_def num[%zu] != 1",
node->GetName().c_str(), (*task_defs).size());
} else {
// The number of tasks of the fourth type operator must be 2
GE_CHK_BOOL_RET_STATUS((*task_defs).size() == 2, PARAM_INVALID,
"Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2",
"[Check][Size]Node[%s] DEPEND_COMPUTE task_def num[%zu] != 2",
node->GetName().c_str(), (*task_defs).size());
}
const auto &task_def = (*task_defs)[0];
@@ -832,15 +883,20 @@ Status AiCpuNodeExecutor::LoadTask(const HybridModel &model,
GELOGI("Node[%s] task type=%u is AicpuNodeTask.", node->GetName().c_str(), task_def.type());
aicpu_task = MakeShared<AicpuNodeTask>(node_item, task_def);
} else {
GELOGE(UNSUPPORTED, "Node[%s] task type=%u is not supported by aicpu node executor.",
GELOGE(UNSUPPORTED, "[Check][Type]Node[%s] task type=%u is not supported by aicpu node executor,"
"RT_MODEL_TASK_KERNEL_EX or RT_MODEL_TASK_KERNEL is supported.",
node->GetName().c_str(), task_def.type());
REPORT_INNER_ERROR("E19999", "Node[%s] task type=%u is not supported by aicpu node executor,"
"RT_MODEL_TASK_KERNEL_EX or RT_MODEL_TASK_KERNEL is supported.",
node->GetName().c_str(), task_def.type());
return UNSUPPORTED;
}

GE_CHK_BOOL_RET_STATUS(aicpu_task != nullptr, MEMALLOC_FAILED,
"Load task for node %s failed.", node->GetName().c_str());
"[Check][State]Load task for node %s failed.", node->GetName().c_str());

GE_CHK_STATUS_RET(aicpu_task->Init(model), "Node[%s] task init failed.", node->GetName().c_str());
GE_CHK_STATUS_RET(aicpu_task->Init(model),
"[Init][AicpuNodeTaskBase] failed for Node[%s].", node->GetName().c_str());

task = std::move(aicpu_task);
GELOGD("Node[%s] load task end.", node->GetName().c_str());


+ 43
- 41
ge/ir_build/atc_ir_common.cc View File

@@ -206,8 +206,8 @@ bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_m
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"},
{"--input_shape's dim", std::to_string(shapes.size()), "Dim num must within [1, 4] when set dynamic_dims"});
GELOGE(ge::PARAM_INVALID, "[Check][DynamicDimsInputShape]Dim num must within [%zu, %zu] when set dynamic_dims.",
kMinNDDimNum, kMaxNDDimNum);
GELOGE(ge::PARAM_INVALID, "[Check][DynamicDimsInputShape]Dim num must within [%zu, %zu] when set dynamic_dims.",
kMinNDDimNum, kMaxNDDimNum);
return false;
}
dynamic_dim += std::count(shapes.begin(), shapes.end(), kDynamicInputDim);
@@ -216,8 +216,9 @@ bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_m
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"},
{"--input_shape's dynamic dim num", "0", "at least one dim should be -1 when set dynamic_dims"});
GELOGE(ge::PARAM_INVALID,
"[Check][DynamicDimsInputShape]--input_shape invalid, at least one dim should be -1 when set dynamic_dims.");
GELOGE(ge::PARAM_INVALID,
"[Check][DynamicDimsInputShape]--input_shape invalid,"
"at least one dim should be -1 when set dynamic_dims.");
return false;
}

@@ -352,8 +353,8 @@ bool ParseSingleShapeRange(std::string &shape_range, vector<pair<int64_t, int64_
} else {
ErrorManager::GetInstance().ATCReportErrMessage("E10048", {"shape_range", "reason", "sample"},
{shape_range, kInputShapeRangeInvalid, kInputShapeRangeSample3});
GELOGE(PARAM_INVALID,"[Parse][Parameter]shape_range:%s invalid, reason: %s, correct sample is %s.",
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3);
GELOGE(PARAM_INVALID, "[Parse][Parameter]shape_range:%s invalid, reason: %s, correct sample is %s.",
shape_range.c_str(), kInputShapeRangeInvalid, kInputShapeRangeSample3);
return false;
}
shape_range_vec.emplace_back(range_pair);
@@ -392,19 +393,18 @@ bool ParseInputShapeRange(const std::string &shape_range,
}
shape_range_map.emplace(make_pair(StringUtils::Trim(shape_range_pair_vec[0]), shape_range_val));
}
return true;
}

Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_image_size, string &dynamic_dims,
const string input_shape, const string input_shape_range, const string input_format,bool &is_dynamic_input){
const string input_shape, const string input_shape_range, const string input_format, bool &is_dynamic_input) {
int32_t param_size = static_cast<int32_t>(!dynamic_batch_size.empty()) +
static_cast<int32_t>(!dynamic_image_size.empty()) + static_cast<int32_t>(!dynamic_dims.empty());
if (param_size > 1) {
ErrorManager::GetInstance().ATCReportErrMessage("E10009", {"parameter0", "parameter1", "parameter2"},
{"dynamic_batch_size", "dynamic_image_size", "dynamic_dims"});
GELOGE(ge::PARAM_INVALID,
"[Parse][Parameter]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one");
GELOGE(ge::PARAM_INVALID,
"[Parse][Parameter]dynamic_batch_size, dynamic_image_size and dynamic_dims can only be set one");
return ge::PARAM_INVALID;
}

@@ -424,8 +424,8 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
is_dynamic_input = true;
if (input_shape.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {"input_shape"});
GELOGE(ge::PARAM_INVALID,
"[Check][Parameter:input_shape]The input_shape can not be empty in dynamic input size scenario.");
GELOGE(ge::PARAM_INVALID,
"[Check][Parameter:input_shape]The input_shape can not be empty in dynamic input size scenario.");
return ge::PARAM_INVALID;
}

@@ -443,8 +443,8 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i

if (!dynamic_image_size.empty()) {
if (!CheckDynamicImagesizeInputShapeValid(shape_map, input_format, dynamic_image_size)) {
GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape] %s invalid. dynamic_image_size:%s ",
input_shape.c_str(), dynamic_image_size.c_str());
GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape] %s invalid. dynamic_image_size:%s ",
input_shape.c_str(), dynamic_image_size.c_str());
return ge::PARAM_INVALID;
}
}
@@ -452,7 +452,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i
if (!dynamic_dims.empty()) {
if (!CheckDynamicDimsInputShapeValid(shape_map, input_format, dynamic_dims)) {
GELOGE(ge::PARAM_INVALID, "[Check][DynamicDimsInputShape]: %s of input shape: %s failed.", dynamic_dims.c_str(),
input_shape.c_str());
input_shape.c_str());
return ge::PARAM_INVALID;
}
}
@@ -504,7 +504,7 @@ bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &sh
ErrorManager::GetInstance().ATCReportErrMessage("E10002", {"shape", "reason", "sample"},
{shape, kDigitError, kInputShapeSample2});
GELOGE(PARAM_INVALID, "[Check][Param]--input_shape's shape value[%s] is not digit",
shape_value_str.c_str());
shape_value_str.c_str());
return false;
}
}
@@ -547,10 +547,10 @@ bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &sh

Status CheckOutputTypeParamValid(const std::string output_type) {
if ((!output_type.empty()) && (kOutputTypeSupportDatatype.find(output_type) == kOutputTypeSupportDatatype.end())) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--output_type", output_type, kOutputTypeSupport});
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
{"--output_type", output_type, kOutputTypeSupport});
GELOGE(ge::PARAM_INVALID,
"[Check][Param]Invalid value for --output_type[%s], %s.", output_type.c_str(), kOutputTypeSupport);
"[Check][Param]Invalid value for --output_type[%s], %s.", output_type.c_str(), kOutputTypeSupport);
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
@@ -559,10 +559,10 @@ Status CheckOutputTypeParamValid(const std::string output_type) {
Status CheckBufferOptimizeParamValid(const std::string buffer_optimize) {
if ((!buffer_optimize.empty()) &&
(kBufferOptimizeSupportOption.find(buffer_optimize) == kBufferOptimizeSupportOption.end())) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--buffer_optimize", buffer_optimize, kBufferOptimizeSupport});
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
{"--buffer_optimize", buffer_optimize, kBufferOptimizeSupport});
GELOGE(ge::PARAM_INVALID,
"[Check][BufferOptimize]Invalid value for [%s], %s.", buffer_optimize.c_str(), kBufferOptimizeSupport);
"[Check][BufferOptimize]Invalid value for [%s], %s.", buffer_optimize.c_str(), kBufferOptimizeSupport);
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
@@ -573,23 +573,23 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight,
if ((!compress_weight_conf.empty()) &&
(!CheckInputPathValid(compress_weight_conf, "--compress_weight_conf"))) {
GELOGE(ge::PARAM_INVALID, "[Check][InputPath]compress weight config file not found, file_name:%s",
compress_weight_conf.c_str());
compress_weight_conf.c_str());
return ge::PARAM_INVALID;
}
if ((enable_compress_weight != "") && (enable_compress_weight != "true") && (enable_compress_weight != "false")) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10005", {"parameter", "value"}, {"enable_compress_weight", enable_compress_weight});
GELOGE(ge::PARAM_INVALID,
"[Check][Param:enable_compress_weight]Input parameter[--enable_compress_weight]'s value:%s must be true or false.",
enable_compress_weight.c_str());
ErrorManager::GetInstance().ATCReportErrMessage("E10005", {"parameter", "value"},
{"enable_compress_weight", enable_compress_weight});
GELOGE(ge::PARAM_INVALID, "[Check][Param:enable_compress_weight]"
"Input parameter[--enable_compress_weight]'s value:%s must be true or false.",
enable_compress_weight.c_str());
return ge::PARAM_INVALID;
}

if ((enable_compress_weight == "true") && (!compress_weight_conf.empty())) {
ErrorManager::GetInstance().ATCReportErrMessage("E10047", {"parameter0", "parameter1"},
{"enable_compress_weight", "compress_weight_conf"});
GELOGE(ge::PARAM_INVALID,
"[Check][CompressWeight]enable_compress_weight and compress_weight_conf can not both exist!!");
GELOGE(ge::PARAM_INVALID,
"[Check][CompressWeight]enable_compress_weight and compress_weight_conf can not both exist!!");
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
@@ -597,8 +597,8 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight,

Status CheckKeepTypeParamValid(const std::string &keep_dtype) {
if ((!keep_dtype.empty()) && (!CheckInputPathValid(keep_dtype, "--keep_dtype"))) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--keep_dtype", keep_dtype, kKeepDtypeError});
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
{"--keep_dtype", keep_dtype, kKeepDtypeError});
GELOGE(ge::PARAM_INVALID, "[Check][InputPath::--keep_dtype] file not found, file_name:%s", keep_dtype.c_str());
return ge::PARAM_INVALID;
}
@@ -622,12 +622,12 @@ int CheckLogParamValidAndSetLogLevel(const std::string log) {
ret = dlog_setlevel(-1, DLOG_ERROR, 1);
} else {
GELOGE(ge::PARAM_INVALID,
"[Check][LogParam]log:%s invalid, only support debug, info, warning, error, null", log.c_str());
"[Check][LogParam]log:%s invalid, only support debug, info, warning, error, null", log.c_str());
REPORT_INPUT_ERROR("E10417", std::vector<std::string>({"loglevel"}), std::vector<std::string>({log}));
return ret;
}
if (ret != 0) {
GELOGE(ge::PARAM_INVALID, "[Set][LogLevel] fail, level:%s.",log.c_str());
GELOGE(ge::PARAM_INVALID, "[Set][LogLevel] fail, level:%s.", log.c_str());
REPORT_INPUT_ERROR("E10417", std::vector<std::string>({"loglevel"}), std::vector<std::string>({log}));

}
@@ -654,10 +654,10 @@ Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory)

Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream) {
if ((enable_single_stream != "") && (enable_single_stream != "true") && (enable_single_stream != "false")) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10005", {"parameter", "value"}, {"enable_single_stream", enable_single_stream});
ErrorManager::GetInstance().ATCReportErrMessage("E10005", {"parameter", "value"},
{"enable_single_stream", enable_single_stream});
GELOGE(ge::PARAM_INVALID, "[Check][Param:--enable_single_stream] value:%s must be true or false.",
enable_single_stream.c_str());
enable_single_stream.c_str());
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
@@ -667,9 +667,10 @@ Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::
// only appointed op_select_implmode, can user appoint optypelist_for_implmode
if (optypelist_for_implmode != "" && op_select_implmode == "") {
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
{"--op_select_implmode", op_select_implmode.c_str(), kCompressWeightError});
{"--op_select_implmode", op_select_implmode.c_str(),
kCompressWeightError});
GELOGE(ge::PARAM_INVALID, "[Check][Param:--op_select_implmode]value:%s invalid, %s.",
op_select_implmode.c_str(),kCompressWeightError);
op_select_implmode.c_str(), kCompressWeightError);
return ge::PARAM_INVALID;
}
// op_select_implmode default value is high_performance
@@ -679,9 +680,10 @@ Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::
if (op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_DEFAULT &&
op_select_implmode != IR_OPTION_OP_SELECT_IMPLMODE_PRECISON) {
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"},
{"--op_select_implmode", op_select_implmode.c_str(), kSelectImplmodeError});
{"--op_select_implmode", op_select_implmode.c_str(),
kSelectImplmodeError});
GELOGE(ge::PARAM_INVALID, "[Check][Implmode]Invalid value for --op_select_implmode[%s], %s.",
op_select_implmode.c_str(), kSelectImplmodeError);
op_select_implmode.c_str(), kSelectImplmodeError);
return ge::PARAM_INVALID;
}
}


+ 3
- 2
ge/ir_build/ge_ir_build.cc View File

@@ -562,7 +562,8 @@ graphStatus Impl::InitDomiOmgContext(const string &input_shape, const string &in
if (iter != ge::input_format_str_to_geformat.end()) {
omg_context_.format = iter->second;
} else {
GELOGE(GRAPH_PARAM_INVALID, "[Check][Param:InputForamt] %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.",
GELOGE(GRAPH_PARAM_INVALID,
"[Check][Param:InputForamt] %s not support , expect ND/NCHW/NHWC/CHWN/NC1HWC0/NHWC1C0.",
input_format.c_str());
return GRAPH_PARAM_INVALID;
}
@@ -573,7 +574,7 @@ graphStatus Impl::InitDomiOmgContext(const string &input_shape, const string &in
}

if (!ParseInputShape(input_shape, omg_context_.input_dims, omg_context_.user_input_dims, is_dynamic_input)) {
GELOGE(GRAPH_PARAM_INVALID, "[Parse][InputShape:ImputShape] Failed, shape: %s", input_shape.c_str());
GELOGE(GRAPH_PARAM_INVALID, "[Parse][InputShape:input_shape] Failed, shape: %s", input_shape.c_str());
return GRAPH_PARAM_INVALID;
}
return GRAPH_SUCCESS;


+ 3
- 3
ge/omm/csa_interact.cc View File

@@ -108,10 +108,10 @@ Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state,

content = content_json.dump();
} catch (const nlohmann::json::exception &e) {
GELOGE(INTERNAL_ERROR, "[Create][JsonObject] exception:%s job_state:%u job_sub_state:%u.",
e.what(), job_state,job_sub_state);
GELOGE(INTERNAL_ERROR, "[Create][JsonObject] exception:%s job_state:%u job_sub_state:%u.",
e.what(), job_state, job_sub_state);
REPORT_INNER_ERROR("E19999", "Create json object failed. exception:%s job_state:%u job_sub_state:%u.",
e.what(), job_state,job_sub_state);
e.what(), job_state, job_sub_state);
return INTERNAL_ERROR;
}



+ 9
- 8
ge/opskernel_manager/ops_kernel_builder_manager.cc View File

@@ -101,7 +101,7 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n
}

Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string,
std::string> &options, std::string &lib_paths) {
std::string> &options, std::string &lib_paths) {
GELOGD("Start to execute GetLibPaths");
std::string path_base = PluginManager::GetPath();
std::string so_path = "plugin/opskernel/";
@@ -129,11 +129,11 @@ Status OpsKernelBuilderManager::CalcOpRunningParam(Node &node) const {
const std::string &lib_name = op_desc->GetOpKernelLibName();
auto it = ops_kernel_builders_.find(lib_name);
if (it == ops_kernel_builders_.end()) {
GELOGE(INTERNAL_ERROR,"[Find][LibName] fail for libName = %s, node = %s.",
lib_name.c_str(), op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999",
"find LibName for CalcOpRunningParam failed, libName = %s, node = %s not exist.",
lib_name.c_str(), op_desc->GetName().c_str());
GELOGE(INTERNAL_ERROR,"[Find][LibName] fail for libName = %s, node = %s.",
lib_name.c_str(), op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999",
"find LibName for CalcOpRunningParam failed, libName = %s, node = %s not exist.",
lib_name.c_str(), op_desc->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -152,9 +152,10 @@ Status OpsKernelBuilderManager::GenerateTask(const Node &node,
const std::string &lib_name = op_desc->GetOpKernelLibName();
auto it = ops_kernel_builders_.find(lib_name);
if (it == ops_kernel_builders_.end()) {
GELOGE(INTERNAL_ERROR, "[Find][LibName]fail for libName = %s, node:%s", lib_name.c_str(), op_desc->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Find][LibName]fail for libName = %s, node:%s", lib_name.c_str(),
op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999", "find LibName for GenerateTask failed, libName = %s, node = %s not exist",
lib_name.c_str(), op_desc->GetName().c_str());
lib_name.c_str(), op_desc->GetName().c_str());
return INTERNAL_ERROR;
}



+ 22
- 22
ge/opskernel_manager/ops_kernel_manager.cc View File

@@ -180,35 +180,35 @@ Status OpsKernelManager::ParsePluginOptions(const map<string, string> &options,
} else if (flag == 1) {
enable_flag = true;
} else {
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]option_key:%s, its value %s is invalid, it must be 0 or 1.",
plugin_name.c_str(), iter->second.c_str());
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]option_key:%s, its value %s is invalid, it must be 0 or 1.",
plugin_name.c_str(), iter->second.c_str());
REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, option_key:%s, "
"its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str());
"its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
} catch (std::invalid_argument &) {
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions] failed, option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
iter->second.c_str());
REPORT_INNER_ERROR("E19999",
"ParsePluginOptions failed, option_key:ge.feFlag, its value %s is invalid_argument, it must be 0 or 1.",
iter->second.c_str());
GELOGE(GE_GRAPH_OPTIONS_INVALID, "[Parse][PluginOptions] failed, option_key:ge.feFlag,"
"its value %s is invalid_argument, it must be 0 or 1.",
iter->second.c_str());
REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, option_key:ge.feFlag,"
"its value %s is invalid_argument, it must be 0 or 1.",
iter->second.c_str());
return GE_GRAPH_OPTIONS_INVALID;
} catch (std::out_of_range &) {
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]failed, option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
iter->second.c_str());
REPORT_INNER_ERROR("E19999",
"ParsePluginOptions failed, option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
iter->second.c_str());
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]failed, option_key:ge.feFlag, its value %s is out of range, it must be 0 or 1.",
iter->second.c_str());
REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, option_key:ge.feFlag,"
"its value %s is out of range, it must be 0 or 1.",
iter->second.c_str());
return GE_GRAPH_OPTIONS_INVALID;
} catch (...) {
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]option_key:%s, its value %s is invalid, it must be 0 or 1.",
plugin_name.c_str(), iter->second.c_str());
GELOGE(GE_GRAPH_OPTIONS_INVALID,
"[Parse][PluginOptions]option_key:%s, its value %s is invalid, it must be 0 or 1.",
plugin_name.c_str(), iter->second.c_str());
REPORT_INNER_ERROR("E19999", "ParsePluginOptions failed, option_key:%s, "
"its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str());
"its value %s is invalid, it must be 0 or 1.", plugin_name.c_str(), iter->second.c_str());
return GE_GRAPH_OPTIONS_INVALID;
}
} else {
@@ -243,8 +243,8 @@ Status OpsKernelManager::InitOpKernelInfoStores(const map<string, string> &optio
GELOGI("OpKernelInfoStore name: %s.", (it.first).c_str());
Status ret = it.second->Initialize(options);
if (ret != SUCCESS) {
GELOGE(GE_OPS_KERNEL_STORE_INIT_FAILED,
"[Init][OpKernelLib]OpKernelInfoStore: %s initialize failed.", (it.first).c_str());
GELOGE(GE_OPS_KERNEL_STORE_INIT_FAILED,
"[Init][OpKernelLib]OpKernelInfoStore: %s initialize failed.", (it.first).c_str());
REPORT_CALL_ERROR("E19999", "OpKernelInfoStore: %s initialize failed.", (it.first).c_str());
return GE_OPS_KERNEL_STORE_INIT_FAILED;
}


+ 39
- 34
ge/session/inner_session.cc View File

@@ -179,9 +179,9 @@ Status InnerSession::AddGraph(uint32_t graph_id, const Graph &graph,
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[Add][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
session_id_, graph_id);
REPORT_INNER_ERROR("E19999", "AddGraph failed because GraphManager not init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(options);
@@ -225,10 +225,10 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
if (mutex_.try_lock()) {
std::lock_guard<std::mutex> lock(mutex_, std::adopt_lock);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999", "RunGraph failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
GELOGE(GE_SESS_INIT_FAILED, "[Run][Graph]failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999", "RunGraph failed because GraphManager not Init, InnerSession:%lu, graph_id:%u.",
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(graph_id);
@@ -255,8 +255,9 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
return SUCCESS;
} else {
GELOGE(GE_SESS_ALREADY_RUNNING, "[Run][Graph]failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph failed because mutex try_lock false, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph failed because mutex try_lock false, InnerSession:%lu, graph_id=%u.",
session_id_, graph_id);
return GE_SESS_ALREADY_RUNNING;
}
}
@@ -264,18 +265,20 @@ Status InnerSession::RunGraph(uint32_t graph_id, const std::vector<Tensor> &inpu
Status InnerSession::RemoveGraph(uint32_t graph_id) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED,
"[Remove][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph failed, because GraphManager not init, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
GELOGE(GE_SESS_INIT_FAILED,
"[Remove][Graph] failed because GraphManager not init, InnerSession:%lu, graph_id=%u.",
session_id_, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph failed, because GraphManager not init, InnerSession:%lu, graph_id=%u.",
session_id_, graph_id);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(graph_id);
Status ret = graph_manager_.RemoveGraph(graph_id);
if (ret != SUCCESS) {
GELOGE(ret, "[Remove][Graph] failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RemoveGraph failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RemoveGraph failed, InnerSession:%lu, graph_id=%u.", session_id_, graph_id);
return ret;
}

@@ -288,18 +291,19 @@ Status InnerSession::RegisterCallBackFunc(
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc] failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not init, InnerSession:%lu.", session_id_);
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc] failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not init, InnerSession:%lu.", session_id_);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(std::map<std::string, std::string>{});
Status ret = graph_manager_.RegisterCallBackFunc(key, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[Register][CallBackFunc] failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.",
session_id_, key.c_str());
return ret;
}

@@ -312,18 +316,20 @@ Status InnerSession::RegisterCallBackFunc(
const std::function<Status(uint32_t, const std::map<AscendString, ge::Tensor> &)> &callback) {
std::lock_guard<std::mutex> lock(resource_mutex_);
if (!init_flag_) {
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc]failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
GELOGE(GE_SESS_INIT_FAILED,
"[Register][CallBackFunc]failed because GraphManager not initialize, InnerSession:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc failed because GraphManager not initialize, InnerSession:%lu.",
session_id_);
return GE_SESS_INIT_FAILED;
}
UpdateThreadContext(std::map<std::string, std::string>{});
Status ret = graph_manager_.RegisterCallBackFunc(key, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[Register][CallBackFunc] failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.", session_id_, key.c_str());
REPORT_CALL_ERROR("E19999",
"GraphManager RegisterCallBackFunc failed, InnerSession:%lu register %s.",
session_id_, key.c_str());
return ret;
}

@@ -349,8 +355,8 @@ Status InnerSession::BuildGraph(uint32_t graph_id, const std::vector<InputTensor
Status ret = graph_manager_.BuildGraph(graph_id, ge_inputs, ge_root_model, session_id_, true);
if (ret != SUCCESS) {
GELOGE(ret, "[Build][Graph] failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager BuildGraph failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager BuildGraph failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
return ret;
}
GELOGI("[InnerSession:%lu] build graph success, graph_id=%u.", session_id_, graph_id);
@@ -364,8 +370,8 @@ Status InnerSession::RunGraphAsync(uint32_t graph_id, const std::vector<InputTen
Status ret = graph_manager_.RunGraphAsync(graph_id, inputs, session_id_, callback);
if (ret != SUCCESS) {
GELOGE(ret, "[Run][GraphAsync]failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RunGraphAsync failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
REPORT_CALL_ERROR("E19999",
"GraphManager RunGraphAsync failed, InnerSession:%lu graph_id=%u.", session_id_, graph_id);
return ret;
}
GELOGI("[InnerSession:%lu] run graph success, graph_id=%u.", session_id_, graph_id);
@@ -427,11 +433,10 @@ Status InnerSession::AddDumpProperties(const DumpProperties &dump_properties) {
Status InnerSession::RemoveDumpProperties() {
DumpManager::GetInstance().RemoveDumpProperties(session_id_);
if (is_dump_server_inited_ && DumpManager::GetInstance().GetDumpPropertiesMap().empty()) {
GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus,
GE_IF_BOOL_EXEC(AdxDataDumpServerUnInit() != kDumpStatus,
GELOGE(PARAM_INVALID, "[UnInit][AdxDataDumpServer] failed, session_id:%lu.", session_id_);
REPORT_INNER_ERROR("E19999",
"RemoveDumpProperties failed because AdxDataDumpServerUnInit failed, session_id:%lu.",
session_id_);
REPORT_INNER_ERROR("E19999", "RemoveDumpProperties failed because AdxDataDumpServerUnInit failed,"
"session_id:%lu", session_id_);
return PARAM_INVALID)
GELOGI("UnInit adx data dump server success");
is_dump_server_inited_ = false;


+ 70
- 69
ge/session/session_manager.cc View File

@@ -93,10 +93,10 @@ Status SessionManager::CreateSession(const std::map<std::string, std::string> &o

Status SessionManager::DestroySession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Destroy][Session]fail for Session manager is not initialized, session_id:%lu.",
session_id);
REPORT_INNER_ERROR("E19999",
"DestroySession fail for Session manager is not initialized, session_id:%lu.", session_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Destroy][Session]fail for Session manager is not initialized, session_id:%lu.", session_id);
REPORT_INNER_ERROR("E19999", "DestroySession fail for Session manager is not initialized, session_id:%lu.",
session_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
std::lock_guard<std::mutex> lock(mutex_);
@@ -123,12 +123,12 @@ Status SessionManager::DestroySession(SessionId session_id) {

Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variable]fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
REPORT_INNER_ERROR("E19999",
"GetVariable fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variable]fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
REPORT_INNER_ERROR("E19999",
"GetVariable fail for Session manager is not initialized, session_id:%lu, input_name:%s.",
session_id, name.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -152,10 +152,11 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999", "AddGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -185,12 +186,12 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][GraphWithCopy]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraphWithCopy fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Add][GraphWithCopy]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"AddGraphWithCopy fail for Session manager is not initialized, session_id:%lu, graph_id:%u",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -220,10 +221,12 @@ Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id,
Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Run][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Run][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -241,12 +244,12 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s

Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Remove][Graph]fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Remove][Graph]fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RemoveGraph fail for Session manager is not initialized, session_id:%lu graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -264,10 +267,10 @@ Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {

bool SessionManager::HasSession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Has][Session]fail for Session manager is not initialized, session_id:%lu.", session_id);
REPORT_INNER_ERROR("E19999",
"HasSession fail for Session manager is not initialized, session_id:%lu.", session_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Has][Session]fail for Session manager is not initialized, session_id:%lu.", session_id);
REPORT_INNER_ERROR("E19999",
"HasSession fail for Session manager is not initialized, session_id:%lu.", session_id);
return false;
}
return session_manager_map_.find(session_id) != session_manager_map_.end();
@@ -289,12 +292,11 @@ Status SessionManager::RegisterCallBackFunc(
SessionId session_id, const std::string &key,
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999", "RegisterCallBackFunc fail for Session manager is not initialized,"
"session_id:%lu, input_key:%s.", session_id, key.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -314,12 +316,11 @@ Status SessionManager::RegisterCallBackFunc(
SessionId session_id, const std::string &key,
const std::function<Status(uint32_t, const std::map<AscendString, ge::Tensor> &)> &callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999",
"RegisterCallBackFunc fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Register][CallBackFunc]fail for Session manager is not initialized, session_id:%lu, input_key:%s.",
session_id, key.c_str());
REPORT_INNER_ERROR("E19999", "RegisterCallBackFunc fail for Session manager is not initialized,"
"session_id:%lu, input_key:%s.", session_id, key.c_str());
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -337,10 +338,10 @@ Status SessionManager::RegisterCallBackFunc(

Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Build][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"BuildGraph fail for Session manager is not initialized, session_id:%lu, graph_id:%u.", session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "[Build][Graph]fail for Session manager is not initialized,"
"session_id:%lu, graph_id:%u.", session_id, graph_id);
REPORT_INNER_ERROR("E19999", "BuildGraph fail for Session manager is not initialized,"
"session_id:%lu, graph_id:%u.", session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -359,12 +360,12 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const
Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id,
const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[AsyncRun][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraphAsync fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[AsyncRun][Graph]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"RunGraphAsync fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -384,10 +385,10 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
std::vector<Tensor> &var_values) {
// step 0: init session manager
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variables]fail for Session manager is not initialized, session_id:%lu", session_id);
REPORT_INNER_ERROR("E19999",
"GetVariables fail for Session manager is not initialized, session_id:%lu", session_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Get][Variables]fail for Session manager is not initialized, session_id:%lu", session_id);
REPORT_INNER_ERROR("E19999",
"GetVariables fail for Session manager is not initialized, session_id:%lu", session_id);
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -453,12 +454,12 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:

bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Check][GraphNeedRebuild]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
GELOGE(GE_SESSION_MANAGER_NOT_INIT,
"[Check][GraphNeedRebuild]fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for Session manager is not initialized, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return true;
}
SessionPtr innerSession = nullptr;
@@ -467,9 +468,9 @@ bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id)
auto it = session_manager_map_.find(session_id);
if (it == session_manager_map_.end()) {
GELOGE(GE_SESSION_NOT_EXIST, "[Find][InnerSession] fail for %lu does not exists", session_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for InnerSession is not exists, session_id:%lu, graph_id:%u.",
session_id, graph_id);
REPORT_INNER_ERROR("E19999",
"IsGraphNeedRebuild fail for InnerSession is not exists, session_id:%lu, graph_id:%u.",
session_id, graph_id);
return true;
} else {
innerSession = it->second;


+ 5
- 5
ge/single_op/single_op_model.cc View File

@@ -169,11 +169,11 @@ Status SingleOpModel::ParseInputNode(const OpDescPtr &op_desc) {
vector<int64_t> offsets = op_desc->GetOutputOffset();
if (offsets.size() != kDataOutputNum) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"[Parse][InputNode]Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
REPORT_INNER_ERROR("E19999",
"ParseInputNode fail for Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
"[Parse][InputNode]Data op should have only one output, but got %zu, op_name:%s, op_type:%s.",
op_desc->GetOutputOffset().size(), op_desc->GetName().c_str(), op_desc->GetType().c_str());
REPORT_INNER_ERROR("E19999", "ParseInputNode fail for Data op should have only one output, but got %zu,"
"op_name:%s, op_type:%s.", op_desc->GetOutputOffset().size(),
op_desc->GetName().c_str(), op_desc->GetType().c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}



+ 4
- 5
ge/single_op/stream_resource.cc View File

@@ -96,7 +96,7 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose,
auto ret = rtMalloc(reinterpret_cast<void **>(&buffer), size, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[RtMalloc][Memory] failed, size = %zu, ret = %d", size, ret);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d, when %s.", size, ret, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d.", size, ret);
return nullptr;
}
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, purpose.c_str(), size)
@@ -104,7 +104,7 @@ uint8_t *StreamResource::DoMallocMemory(const std::string &purpose,
ret = rtMemset(buffer, size, 0U, size);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[RtMemset][Memory] failed, ret = %d", ret);
REPORT_INNER_ERROR("E19999", "rtMemset failed, ret = %d, when %s.", ret, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "rtMemset failed, ret = %d.", ret);
auto rt_ret = rtFree(buffer);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[RtFree][Memory] failed"));
return nullptr;
@@ -132,8 +132,7 @@ uint8_t *StreamResource::MallocWeight(const std::string &purpose, size_t size) {
auto ret = rtMalloc(reinterpret_cast<void **>(&buffer), size, RT_MEMORY_HBM);
if (ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[RtMalloc][Memory] failed, size = %zu, ret = %d", size, ret);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d when %s.",
size, ret, __FUNCTION__);
REPORT_INNER_ERROR("E19999", "rtMalloc failed, size = %zu, ret = %d.", size, ret);
return nullptr;
}

@@ -192,7 +191,7 @@ Status StreamResource::BuildOperator(const ModelData &model_data, SingleOp **sin
auto new_op = std::unique_ptr<SingleOp>(new(std::nothrow) SingleOp(this, &stream_mu_, stream_));
if (new_op == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "[New][SingleOp] failed.");
REPORT_INNER_ERROR("E19999", "new SingleOp failed when %s.", __FUNCTION__);
REPORT_CALL_ERROR("E19999", "new SingleOp failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}



Loading…
Cancel
Save