|
|
@@ -15,13 +15,15 @@ |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "hybrid/node_executor/aicore/aicore_op_task.h" |
|
|
|
#include "framework/common/taskdown_common.h" |
|
|
|
|
|
|
|
#include "common/formats/formats.h" |
|
|
|
#include "external/graph/types.h" |
|
|
|
#include "framework/common/debug/log.h" |
|
|
|
#include "framework/common/taskdown_common.h" |
|
|
|
#include "graph/ge_context.h" |
|
|
|
#include "graph/load/model_manager/tbe_handle_store.h" |
|
|
|
#include "hybrid/executor/hybrid_execution_context.h" |
|
|
|
#include "hybrid/node_executor/aicore/aicore_task_builder.h" |
|
|
|
#include "graph/load/model_manager/tbe_handle_store.h" |
|
|
|
#include "external/graph/types.h" |
|
|
|
#include "single_op/task/build_task_utils.h" |
|
|
|
#include "single_op/task/tbe_task_builder.h" |
|
|
|
|
|
|
@@ -51,7 +53,42 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) { |
|
|
|
return ret.second; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
Status AiCoreOpTask::Init(const OpDesc &op_desc, |
|
|
|
const domi::TaskDef &task_def) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::DoInit(op_desc, task_def)); |
|
|
|
int32_t unknown_shape_op_type_val = static_cast<int32_t>(DEPEND_IN_SHAPE); |
|
|
|
(void)AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, |
|
|
|
unknown_shape_op_type_val); |
|
|
|
unknown_shape_op_type_ = |
|
|
|
static_cast<UnknowShapeOpType>(unknown_shape_op_type_val); |
|
|
|
GELOGD("Op [%s] unknown shape type is %d", op_desc.GetName().c_str(), |
|
|
|
unknown_shape_op_type_); |
|
|
|
if (unknown_shape_op_type_ == DEPEND_SHAPE_RANGE) { |
|
|
|
// size,dim1,...,dim8: 9*4=36 |
|
|
|
const size_t kDefaultShapeSize = 36; |
|
|
|
size_t size = kDefaultShapeSize * op_desc.GetOutputsSize(); |
|
|
|
if (size = 0) { |
|
|
|
GELOGE(PARAM_INVALID, |
|
|
|
"Op [%s] unknown shape type is %d, but outputs size is 0.", |
|
|
|
op_desc.GetName().c_str(), unknown_shape_op_type_); |
|
|
|
return PARAM_INVALID; |
|
|
|
} |
|
|
|
auto allocator = NpuMemoryAllocator::GetAllocator(); |
|
|
|
GE_CHECK_NOTNULL(allocator); |
|
|
|
shape_buffer_ = TensorBuffer::Create(allocator, size); |
|
|
|
GE_CHECK_NOTNULL(shape_buffer_); |
|
|
|
GELOGD("Op [%s] allocate memory for outputs shape success, size=%zu", |
|
|
|
op_desc.GetName().c_str(), size); |
|
|
|
vector<char> default_value(size, 0); |
|
|
|
GE_CHK_RT_RET(rtMemory(shape_buffer_->GetData(), shape_buffer_->GetSize(), |
|
|
|
default_value.data(), size, |
|
|
|
RT_MEMCPY_HOST_TO_DEVICE)); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::DoInit(const OpDesc &op_desc, |
|
|
|
const domi::TaskDef &task_def) { |
|
|
|
op_type_ = op_desc.GetType(); |
|
|
|
log_name_ = op_desc.GetName() + "_tvmbin"; |
|
|
|
log_id_ = log_id++; |
|
|
@@ -81,6 +118,89 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::UpdateOutputsShape(TaskContext &context) const { |
|
|
|
GELOGD("Node[%s] start update outputs shape.", context.GetNodeName()); |
|
|
|
GE_CHECK_NOTNULL(shape_buffer_); |
|
|
|
auto outputs_shape_buffer = |
|
|
|
std::unique_ptr<uint8_t[]>(new uint8_t[shape_buffer_->Getsize()]); |
|
|
|
GE_CHK_RT_RET(rtMemcpy(outputs_shape_buffer.get(), shape_buffer_->GetSize(), |
|
|
|
shape_buffer_->GetData(), shape_buffer_->GetSize(), |
|
|
|
RT_MEMCPY_DEVICE_TO_HOST)); |
|
|
|
int num_outputs = context.NumOutputs(); |
|
|
|
auto outputs_shape = |
|
|
|
reinterpret_cast<uint32_t(*)[num_outputs]>(outputs_shape_buffer.get()); |
|
|
|
for (int i = 0; i < num_outputs; ++i) { |
|
|
|
if (outputs_shape[i][0] != 0) { |
|
|
|
uint32_t dim_num = outputs_shape[i][0]; |
|
|
|
const uint32_t kMaxDimNum = 8; |
|
|
|
GE_CHECK_LE(dim_num, kMaxDimNum); |
|
|
|
vector<int64_t> dims; |
|
|
|
for (uint32_t j = 0; j < dim_num; ++j) { |
|
|
|
dims.emplace_back(static_cast<int64_t>(outputs_shape[i][j])); |
|
|
|
} |
|
|
|
auto shape_new = GeShape(dims); |
|
|
|
GELOGD("Node[%s] output[%d] shape:%s.", context.GetNodeName(), i, |
|
|
|
ToString(dims).c_str()); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(UpdateShapeToOutputDesc(context, shape_new, i)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::UpdateShapeToOutputDesc(TaskContext &context, |
|
|
|
const GeShape &shape, |
|
|
|
const int output_index) const { |
|
|
|
auto output_desc = context.MutableOutputDesc(output_index); |
|
|
|
GE_CHECK_NOTNULL(output_desc); |
|
|
|
auto shape_old = output_desc->GeShape(); |
|
|
|
auto origin_shape_old = output_desc->GetOriginShape(); |
|
|
|
GELOGD( |
|
|
|
"Node[%s] try to update output[%d] shape from %s to %s, origin_shape " |
|
|
|
"from %s to %s.", |
|
|
|
context.GetNodeName(), output_index, shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str(), origin_shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str()); |
|
|
|
auto origin_format = output_desc->GetOriginFormat(); |
|
|
|
auto format = output_desc->GetFormat(); |
|
|
|
auto node_state = context.GetNodeState(); |
|
|
|
GE_CHECK_NOTNULL(node_state); |
|
|
|
if (origin_format == format) { |
|
|
|
GE_CHK_STATUS_RET( |
|
|
|
node_state->UpdateOutputShapes(output_index, shape, shape), |
|
|
|
"Node[%s] try to update output[%d] shape from %s to %s, origin_shape " |
|
|
|
"from %s to %s failed.", |
|
|
|
context.GetNodeName(), output_index, shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str(), origin_shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str()); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
// if format is not same need convert shape |
|
|
|
std::vector<int64_t> origin_dims_new; |
|
|
|
auto trans_ret = |
|
|
|
formats::TransShape(format, shape.GetDims(), output_desc->GetDataType(), |
|
|
|
origin_format, origin_dims_new); |
|
|
|
GE_CHK_STATUS_RET( |
|
|
|
trans_ret, |
|
|
|
"[Trans][Shape] failed for node[%s] output[%d], origin_format[%d] " |
|
|
|
"is not same as format[%d], shape=%s.", |
|
|
|
context.GetNodeName(), output_index, origin_format, format, |
|
|
|
shape.ToString().c_str()); |
|
|
|
auto origin_shape_new = GeShape(origin_dims_new); |
|
|
|
GE_CHK_STATUS_RET( |
|
|
|
node_state->UpdateOutputShapes(output_index, shape, origin_shape_new), |
|
|
|
"Node[%s] try to update output[%d] shape from %s to %s, origin_shape " |
|
|
|
"from %s to %s failed.", |
|
|
|
context.GetNodeName(), output_index, shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str(), origin_shape_old.ToString().c_str(), |
|
|
|
origin_shape_new.ToString().c_str()); |
|
|
|
GELOGD( |
|
|
|
"Node[%s] update output[%d] shape from %s to %s, origin_shape " |
|
|
|
"from %s to %s.", |
|
|
|
context.GetNodeName(), output_index, shape_old.ToString().c_str(), |
|
|
|
shape.ToString().c_str(), origin_shape_old.ToString().c_str(), |
|
|
|
origin_shape_new.ToString().c_str()); |
|
|
|
return SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { |
|
|
|
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); |
|
|
|
if (rt_ret != RT_ERROR_NONE) { |
|
|
@@ -429,6 +549,11 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { |
|
|
|
if (tiling_buffer_ != nullptr) { |
|
|
|
++expected_arg_count; |
|
|
|
} |
|
|
|
|
|
|
|
if (shape_buffer_ != nullptr) { |
|
|
|
++expected_arg_count; |
|
|
|
} |
|
|
|
|
|
|
|
if (expected_arg_count > max_arg_count_) { |
|
|
|
GELOGD("Need to reset size of args_ from %u to %zu.", max_arg_count_, expected_arg_count); |
|
|
|
auto length = expected_arg_count * sizeof(uintptr_t) + offset_; |
|
|
@@ -465,6 +590,12 @@ Status AiCoreOpTask::UpdateArgs(TaskContext &task_context) { |
|
|
|
arg_base_[index++] = reinterpret_cast<uintptr_t>(output->GetData()); |
|
|
|
} |
|
|
|
|
|
|
|
if (shape_buffer_ != nullptr) { |
|
|
|
arg_base_[index++] = reinterpret_cast<uintptr_t>(shape_buffer_->GetData()); |
|
|
|
GELOGD("Node:%s add shape buffer addr to args.", |
|
|
|
task_context.GetNodeName()); |
|
|
|
} |
|
|
|
|
|
|
|
int workspace_num = static_cast<int>(task_context.NumWorkspaces()); |
|
|
|
for (int i = 0; i < workspace_num; ++i) { |
|
|
|
const auto workspace = task_context.MutableWorkspace(i); |
|
|
@@ -567,7 +698,7 @@ std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const { |
|
|
|
} |
|
|
|
|
|
|
|
Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { |
|
|
|
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def)); |
|
|
|
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::DoInit(op_desc, task_def)); |
|
|
|
return InitAtomicAddrCleanIndices(op_desc); |
|
|
|
} |
|
|
|
|
|
|
|