diff --git a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc index c39f25a4..3dca8661 100755 --- a/ge/hybrid/node_executor/aicore/aicore_node_executor.cc +++ b/ge/hybrid/node_executor/aicore/aicore_node_executor.cc @@ -29,8 +29,7 @@ bool IsNoOp(const NodeItem &node_item) { const auto &tensor_desc = node_item.MutableOutputDesc(i); GE_CHECK_NOTNULL(tensor_desc); const auto &shape = tensor_desc->MutableShape(); - if (shape.IsScalar() || shape.GetShapeSize() > 0 || - (node_item.shape_inference_type == DEPEND_SHAPE_RANGE)) { + if (shape.IsScalar() || shape.GetShapeSize() > 0 || (node_item.shape_inference_type == DEPEND_SHAPE_RANGE)) { return false; } } @@ -220,28 +219,28 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End"); } - auto callback = [=, &context]() { - Status callback_ret = SUCCESS; - if (!tasks_.empty()) { - // only last task need update outputs shape - auto task = tasks_.back().get(); - if (task->GetUnknownShapeOpType() == DEPEND_SHAPE_RANGE) { + auto callback = done_callback; + if (!tasks_.empty()) { + // only last task need update outputs shape + auto task = tasks_.back().get(); + if (task->GetUnknownShapeOpType() == DEPEND_SHAPE_RANGE) { + callback = [=, &context]() { + Status callback_ret = SUCCESS; GELOGD("Node[%s] need update outputs shape.", context.GetNodeName()); callback_ret = task->UpdateOutputsShape(context); - } + if (done_callback != nullptr) { + context.SetStatus(callback_ret); + done_callback(); + } + }; } - if (done_callback != nullptr) { - context.SetStatus(callback_ret); - done_callback(); - } - }; - - RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), - "[AiCoreNodeRegisterCallback] Start"); - GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback)); - RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), - "[AiCoreNodeRegisterCallback] End"); + } + if (callback != nullptr) { + RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] Start"); + GE_CHK_STATUS_RET_NOLOG(context.RegisterCallback(callback)); + RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeRegisterCallback] End"); + } GELOGD("[%s] ExecuteAsync End.", context.GetNodeName()); RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeTaskExecuteAsync] End"); return SUCCESS; diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 83cec5f9..b85ad8fe 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -37,6 +37,9 @@ constexpr char const *kAttrOpParamSize = "op_para_size"; constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; const string kAtomicOpType = "DynamicAtomicAddrClean"; std::atomic log_id(0); +const uint32_t kMaxDimNum = 8; +// size,dim1,...,dim8: 9*4=36 +const size_t kShapeBufferSize = sizeof(uint32_t) * (1 + kMaxDimNum); } // namespace TbeHandleHolder::TbeHandleHolder(void *bin_handle) @@ -53,36 +56,27 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr &&holder) { return ret.second; } -Status AiCoreOpTask::Init(const OpDesc &op_desc, - const domi::TaskDef &task_def) { +Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { GE_CHK_STATUS_RET_NOLOG(DoInit(op_desc, task_def)); int32_t unknown_shape_op_type_val = static_cast(DEPEND_IN_SHAPE); - (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, - unknown_shape_op_type_val); - unknown_shape_op_type_ = - static_cast(unknown_shape_op_type_val); - GELOGD("Op [%s] unknown shape type is %d", op_desc.GetName().c_str(), - unknown_shape_op_type_); + (void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_op_type_val); + unknown_shape_op_type_ = static_cast(unknown_shape_op_type_val); + GELOGD("Op [%s] unknown shape type is %d", op_desc.GetName().c_str(), unknown_shape_op_type_); if (unknown_shape_op_type_ == DEPEND_SHAPE_RANGE) { - // size,dim1,...,dim8: 9*4=36 - const size_t kDefaultShapeSize = 36; - size_t size = kDefaultShapeSize * op_desc.GetOutputsSize(); + size_t size = kShapeBufferSize * op_desc.GetOutputsSize(); if (size == 0) { - GELOGE(PARAM_INVALID, - "Op [%s] unknown shape type is %d, but outputs size is 0.", - op_desc.GetName().c_str(), unknown_shape_op_type_); + GELOGE(PARAM_INVALID, "Op [%s] unknown shape type is %d, but outputs size is 0.", op_desc.GetName().c_str(), + unknown_shape_op_type_); return PARAM_INVALID; } auto allocator = NpuMemoryAllocator::GetAllocator(); GE_CHECK_NOTNULL(allocator); shape_buffer_ = TensorBuffer::Create(allocator, size); GE_CHECK_NOTNULL(shape_buffer_); - GELOGD("Op [%s] allocate memory for outputs shape success, size=%zu", - op_desc.GetName().c_str(), size); - vector default_value(size, 0); - GE_CHK_RT_RET(rtMemcpy(shape_buffer_->GetData(), shape_buffer_->GetSize(), - default_value.data(), size, - RT_MEMCPY_HOST_TO_DEVICE)); + GELOGD("Op [%s] allocate memory for outputs shape success, size=%zu", op_desc.GetName().c_str(), size); + GE_CHK_RT_RET(rtMemset(shape_buffer_->GetData(), shape_buffer_->GetSize(), 0, size)); + host_shape_buffer_.reset(new (std::nothrow) uint8_t[shape_buffer_->GetSize()]); + GE_CHECK_NOTNULL(host_shape_buffer_); } return SUCCESS; } @@ -121,84 +115,68 @@ Status AiCoreOpTask::DoInit(const OpDesc &op_desc, Status AiCoreOpTask::UpdateOutputsShape(TaskContext &context) const { GELOGD("Node[%s] start update outputs shape.", context.GetNodeName()); GE_CHECK_NOTNULL(shape_buffer_); - auto outputs_shape_buffer = - std::unique_ptr(new uint8_t[shape_buffer_->GetSize()]); - GE_CHK_RT_RET(rtMemcpy(outputs_shape_buffer.get(), shape_buffer_->GetSize(), - shape_buffer_->GetData(), shape_buffer_->GetSize(), - RT_MEMCPY_DEVICE_TO_HOST)); + GE_CHECK_NOTNULL(host_shape_buffer_); + GE_CHK_RT_RET(rtMemcpy(host_shape_buffer_.get(), shape_buffer_->GetSize(), shape_buffer_->GetData(), + shape_buffer_->GetSize(), RT_MEMCPY_DEVICE_TO_HOST)); int num_outputs = context.NumOutputs(); - auto outputs_shape = - reinterpret_cast(outputs_shape_buffer.get()); + auto outputs_shape = reinterpret_cast(host_shape_buffer_.get()); for (int i = 0; i < num_outputs; ++i) { if (outputs_shape[i][0] != 0) { uint32_t dim_num = outputs_shape[i][0]; - const uint32_t kMaxDimNum = 8; GE_CHECK_LE(dim_num, kMaxDimNum); vector dims; - for (uint32_t j = 0; j < dim_num; ++j) { + for (uint32_t j = 1; j <= dim_num; ++j) { dims.emplace_back(static_cast(outputs_shape[i][j])); } auto shape_new = GeShape(dims); - GELOGD("Node[%s] output[%d] shape:%s.", context.GetNodeName(), i, - ToString(dims).c_str()); + GELOGD("Node[%s] output[%d] shape:%s.", context.GetNodeName(), i, ToString(dims).c_str()); GE_CHK_STATUS_RET_NOLOG(UpdateShapeToOutputDesc(context, shape_new, i)); } } return SUCCESS; } -Status AiCoreOpTask::UpdateShapeToOutputDesc(TaskContext &context, - const GeShape &shape, - const int output_index) const { +Status AiCoreOpTask::UpdateShapeToOutputDesc(TaskContext &context, const GeShape &shape, const int output_index) const { auto output_desc = context.MutableOutputDesc(output_index); GE_CHECK_NOTNULL(output_desc); auto shape_old = output_desc->GetShape(); auto origin_shape_old = output_desc->GetOriginShape(); - GELOGD( - "Node[%s] try to update output[%d] shape from %s to %s, origin_shape " - "from %s to %s.", - context.GetNodeName(), output_index, shape_old.ToString().c_str(), - shape.ToString().c_str(), origin_shape_old.ToString().c_str(), - shape.ToString().c_str()); auto origin_format = output_desc->GetOriginFormat(); auto format = output_desc->GetFormat(); auto node_state = context.GetNodeState(); GE_CHECK_NOTNULL(node_state); if (origin_format == format) { - GE_CHK_STATUS_RET( - node_state->UpdateOutputShapes(output_index, shape, shape), - "Node[%s] try to update output[%d] shape from %s to %s, origin_shape " - "from %s to %s failed.", - context.GetNodeName(), output_index, shape_old.ToString().c_str(), - shape.ToString().c_str(), origin_shape_old.ToString().c_str(), - shape.ToString().c_str()); + GELOGD( + "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape " + "from [%s] to [%s].", + context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(), + origin_shape_old.ToString().c_str(), shape.ToString().c_str()); + GE_CHK_STATUS_RET(node_state->UpdateOutputShapes(output_index, shape, shape), + "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape " + "from [%s] to [%s] failed.", + context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(), + origin_shape_old.ToString().c_str(), shape.ToString().c_str()); return SUCCESS; } // if format is not same need convert shape std::vector origin_dims_new; auto trans_ret = - formats::TransShape(format, shape.GetDims(), output_desc->GetDataType(), - origin_format, origin_dims_new); - GE_CHK_STATUS_RET( - trans_ret, - "[Trans][Shape] failed for node[%s] output[%d], origin_format[%d] " - "is not same as format[%d], shape=%s.", - context.GetNodeName(), output_index, origin_format, format, - shape.ToString().c_str()); + formats::TransShape(format, shape.GetDims(), output_desc->GetDataType(), origin_format, origin_dims_new); + GE_CHK_STATUS_RET(trans_ret, + "[Trans][Shape] failed for node[%s] output[%d], origin_format[%d] " + "is not same as format[%d], shape=[%s].", + context.GetNodeName(), output_index, origin_format, format, shape.ToString().c_str()); auto origin_shape_new = GeShape(origin_dims_new); - GE_CHK_STATUS_RET( - node_state->UpdateOutputShapes(output_index, shape, origin_shape_new), - "Node[%s] try to update output[%d] shape from %s to %s, origin_shape " - "from %s to %s failed.", - context.GetNodeName(), output_index, shape_old.ToString().c_str(), - shape.ToString().c_str(), origin_shape_old.ToString().c_str(), - origin_shape_new.ToString().c_str()); + GE_CHK_STATUS_RET(node_state->UpdateOutputShapes(output_index, shape, origin_shape_new), + "Node[%s] try to update output[%d] shape from [%s] to [%s], origin_shape " + "from [%s] to [%s] failed.", + context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(), + origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); GELOGD( - "Node[%s] update output[%d] shape from %s to %s, origin_shape " - "from %s to %s.", - context.GetNodeName(), output_index, shape_old.ToString().c_str(), - shape.ToString().c_str(), origin_shape_old.ToString().c_str(), - origin_shape_new.ToString().c_str()); + "Node[%s] update output[%d] shape from [%s] to [%s], origin_shape " + "from [%s] to [%s].", + context.GetNodeName(), output_index, shape_old.ToString().c_str(), shape.ToString().c_str(), + origin_shape_old.ToString().c_str(), origin_shape_new.ToString().c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 68d1ed55..a436c9c1 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -133,6 +133,7 @@ class AiCoreOpTask { std::string op_type_; UnknowShapeOpType unknown_shape_op_type_ = DEPEND_IN_SHAPE; std::unique_ptr shape_buffer_ = nullptr; + std::unique_ptr host_shape_buffer_ = nullptr; }; class AtomicAddrCleanOpTask : public AiCoreOpTask {