From: @changzherui Reviewed-by: @liujunzhu,@lilongfei15 Signed-off-by: @liucunweitags/v1.2.0
@@ -240,7 +240,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
rm -rf ${BASEPATH}/cov | rm -rf ${BASEPATH}/cov | ||||
mkdir ${BASEPATH}/cov | mkdir ${BASEPATH}/cov | ||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | ||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '*/ge/common/*' '*/ge/executor/*' '*/ge/graph/*' '*/ge/host_kernels/*' '/usr/local/*' -o cov/coverage.info | |||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | cd ${BASEPATH}/cov | ||||
genhtml coverage.info | genhtml coverage.info | ||||
fi | fi | ||||
@@ -99,8 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { | |||||
} | } | ||||
int64_t output_size = 0; | int64_t output_size = 0; | ||||
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | ||||
GELOGE(PARAM_INVALID, "Get output size filed"); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
GELOGD("Get output size in lanch dump op is %ld", output_size); | GELOGD("Get output size in lanch dump op is %ld", output_size); | ||||
output.set_size(output_size); | output.set_size(output_size); | ||||
@@ -126,8 +126,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { | |||||
} | } | ||||
int64_t input_size = 0; | int64_t input_size = 0; | ||||
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | ||||
GELOGE(PARAM_INVALID, "Get output size filed"); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
GELOGD("Get input size in lanch dump op is %ld", input_size); | GELOGD("Get input size in lanch dump op is %ld", input_size); | ||||
input.set_size(input_size); | input.set_size(input_size); | ||||
@@ -151,31 +151,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
size_t proto_size = op_mapping_info.ByteSizeLong(); | size_t proto_size = op_mapping_info.ByteSizeLong(); | ||||
bool ret = op_mapping_info.SerializeToString(&proto_msg); | bool ret = op_mapping_info.SerializeToString(&proto_msg); | ||||
if (!ret || proto_size == 0) { | if (!ret || proto_size == 0) { | ||||
GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_FAILED; | |||||
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
return RT_FAILED; | |||||
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); | rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_FAILED; | |||||
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); | rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
return RT_FAILED; | |||||
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
constexpr int32_t io_addr_num = 2; | constexpr int32_t io_addr_num = 2; | ||||
@@ -193,8 +193,8 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { | |||||
nullptr, // no need smDesc | nullptr, // no need smDesc | ||||
stream_); | stream_); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); | |||||
return rt_ret; | |||||
GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
GELOGI("Kernel launch dump op success"); | GELOGI("Kernel launch dump op success"); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -204,9 +204,15 @@ Status DumpOp::LaunchDumpOp() { | |||||
GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); | ||||
int32_t device_id = 0; | int32_t device_id = 0; | ||||
rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
if (rt_ret != RT_ERROR_NONE || device_id < 0) { | |||||
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||||
return RT_FAILED; | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
if (device_id < 0) { | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"Check device_id failed, device_id = %d, which should be not less than 0.", | |||||
device_id); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
aicpu::dump::OpMappingInfo op_mapping_info; | aicpu::dump::OpMappingInfo op_mapping_info; | ||||
auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; | auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; | ||||
@@ -232,29 +238,31 @@ Status DumpOp::LaunchDumpOp() { | |||||
task.mutable_op()->set_op_name(op_desc_->GetName()); | task.mutable_op()->set_op_name(op_desc_->GetName()); | ||||
task.mutable_op()->set_op_type(op_desc_->GetType()); | task.mutable_op()->set_op_type(op_desc_->GetType()); | ||||
if (dump_properties_.GetDumpMode() == kDumpOutput) { | if (dump_properties_.GetDumpMode() == kDumpOutput) { | ||||
if (DumpOutput(task) != SUCCESS) { | |||||
GELOGE(FAILED, "Dump output failed"); | |||||
return FAILED; | |||||
auto ret = DumpOutput(task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump output failed"); | |||||
return ret; | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpInput) { | if (dump_properties_.GetDumpMode() == kDumpInput) { | ||||
if (DumpInput(task) != SUCCESS) { | |||||
GELOGE(FAILED, "Dump input failed"); | |||||
return FAILED; | |||||
auto ret = DumpInput(task); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Dump input failed"); | |||||
return ret; | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
} | } | ||||
if (dump_properties_.GetDumpMode() == kDumpAll) { | if (dump_properties_.GetDumpMode() == kDumpAll) { | ||||
auto ret = DumpOutput(task); | auto ret = DumpOutput(task); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Dump output failed when in dumping all"); | |||||
return FAILED; | |||||
GELOGE(ret, "Dump output failed when in dumping all"); | |||||
return ret; | |||||
} | } | ||||
ret = DumpInput(task); | ret = DumpInput(task); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(FAILED, "Dump input failed when in dumping all"); | |||||
return FAILED; | |||||
GELOGE(ret, "Dump input failed when in dumping all"); | |||||
return ret; | |||||
} | } | ||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
} | } | ||||
@@ -162,7 +162,7 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu | |||||
Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | ||||
DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); | GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) | REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) | ||||
@@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
auto c0 = GetCubeSizeByDataType(data_type); | auto c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 < 0) { | if (c0 < 0) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
auto c1 = Ceil(c, c0); | auto c1 = Ceil(c, c0); | ||||
@@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||||
Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType data_type, | Status TransShapeDhwckToFz3D(const std::vector<int64_t> &src_shape, DataType data_type, | ||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { | if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto d = src_shape.at(kDhwcnD); | auto d = src_shape.at(kDhwcnD); | ||||
auto h = src_shape.at(kDhwcnH); | auto h = src_shape.at(kDhwcnH); | ||||
@@ -163,14 +163,14 @@ Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::v | |||||
DataType data_type, Format dst_format, | DataType data_type, Format dst_format, | ||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
if (CheckDataTypeSupport(data_type) != SUCCESS) { | if (CheckDataTypeSupport(data_type) != SUCCESS) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { | if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { | ||||
return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); | return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); | ||||
} | } | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) | REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) | ||||
@@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
auto c0 = GetCubeSizeByDataType(data_type); | auto c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 < 0) { | if (c0 < 0) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
auto c1 = Ceil(c, c0); | auto c1 = Ceil(c, c0); | ||||
@@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat | |||||
Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, DataType data_type, | Status TransShapeDhwncToFz3DTranspose(const std::vector<int64_t> &src_shape, DataType data_type, | ||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { | if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto d = src_shape.at(kDhwncD); | auto d = src_shape.at(kDhwncD); | ||||
auto h = src_shape.at(kDhwncH); | auto h = src_shape.at(kDhwncH); | ||||
@@ -164,14 +164,14 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, con | |||||
DataType data_type, Format dst_format, | DataType data_type, Format dst_format, | ||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
if (CheckDataTypeSupport(data_type) != SUCCESS) { | if (CheckDataTypeSupport(data_type) != SUCCESS) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { | if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { | ||||
return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); | return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); | ||||
} | } | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) | REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) | ||||
@@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||||
hw_shape.push_back(DIM_DEFAULT_VALUE); | hw_shape.push_back(DIM_DEFAULT_VALUE); | ||||
hw_shape.push_back(src_shape[kNdDimIndexN]); | hw_shape.push_back(src_shape[kNdDimIndexN]); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
default: | default: | ||||
@@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap | |||||
hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | ||||
hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -299,11 +299,19 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & | |||||
Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | ||||
Format dst_format, ShapeVector &dst_shape) { | Format dst_format, ShapeVector &dst_shape) { | ||||
if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { | |||||
GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, data type %s is not supported", | |||||
if (!IsDataTypeSupport(data_type)) { | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | ||||
ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | |||||
if (!CheckShape(src_format, src_shape)) { | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||||
"Trans format from %s to %s, src shape %s, data type %s is not supported", | |||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||||
ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
ShapeVector hw_shape; | ShapeVector hw_shape; | ||||
return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); | return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); | ||||
@@ -334,7 +342,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto | |||||
Format dst_format, ShapeVector &dst_shape) { | Format dst_format, ShapeVector &dst_shape) { | ||||
GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | ||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) | REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) | ||||
@@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||||
Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | ||||
auto c0 = GetCubeSizeByDataType(data_type); | auto c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 < 0) { | if (c0 < 0) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
auto c1 = Ceil(c, c0); | auto c1 = Ceil(c, c0); | ||||
@@ -54,15 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ | |||||
dst_shape.push_back(kNiSize); | dst_shape.push_back(kNiSize); | ||||
dst_shape.push_back(c0); | dst_shape.push_back(c0); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto n = src_shape.at(kNchwN); | auto n = src_shape.at(kNchwN); | ||||
@@ -74,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||||
Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto h = src_shape.at(kHwcnH); | auto h = src_shape.at(kHwcnH); | ||||
@@ -87,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector<int64_t> &src_shape, DataType data_t | |||||
Status TransShapeNhwcToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | Status TransShapeNhwcToFz(const std::vector<int64_t> &src_shape, DataType data_type, std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto n = src_shape.at(kNhwcN); | auto n = src_shape.at(kNhwcN); | ||||
@@ -369,7 +370,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r | |||||
Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | ||||
Format dst_format, std::vector<int64_t> &dst_shape) { | Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
if (CheckDataTypeSupport(data_type) != SUCCESS) { | if (CheckDataTypeSupport(data_type) != SUCCESS) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { | if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { | ||||
@@ -382,7 +383,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector<i | |||||
return TransShapeNchwToFz(src_shape, data_type, dst_shape); | return TransShapeNchwToFz(src_shape, data_type, dst_shape); | ||||
} | } | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferFractalZ, FORMAT_NCHW, FORMAT_FRACTAL_Z) | REGISTER_FORMAT_TRANSFER(FormatTransferFractalZ, FORMAT_NCHW, FORMAT_FRACTAL_Z) | ||||
@@ -86,8 +86,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||||
hw_shape.push_back(DIM_DEFAULT_VALUE); | hw_shape.push_back(DIM_DEFAULT_VALUE); | ||||
hw_shape.push_back(src_shape[kNdDimIndexN]); | hw_shape.push_back(src_shape[kNdDimIndexN]); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
default: | default: | ||||
@@ -105,8 +106,9 @@ Status TransShapeToFracZz(const ShapeVector &src_shape, DataType data_type, Shap | |||||
hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); | ||||
hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -311,11 +313,19 @@ Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult & | |||||
Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, | ||||
Format dst_format, ShapeVector &dst_shape) { | Format dst_format, ShapeVector &dst_shape) { | ||||
if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { | |||||
GELOGE(PARAM_INVALID, "Not support trans format from %s to %s, src shape %s, data type %s", | |||||
if (!IsDataTypeSupport(data_type)) { | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | ||||
ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | |||||
if (!CheckShape(src_format, src_shape)) { | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, | |||||
"Not support trans format from %s to %s, src shape %s, data type %s", | |||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), | |||||
ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
ShapeVector hw_shape; | ShapeVector hw_shape; | ||||
return TransShapeToFracZz(src_shape, data_type, dst_shape, hw_shape); | return TransShapeToFracZz(src_shape, data_type, dst_shape, hw_shape); | ||||
@@ -346,7 +356,7 @@ Status FormatTransferFractalZzND::TransShape(Format src_format, const ShapeVecto | |||||
Format dst_format, ShapeVector &dst_shape) { | Format dst_format, ShapeVector &dst_shape) { | ||||
GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", | ||||
TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferFractalZz, FORMAT_ND, FORMAT_FRACTAL_ZZ) | REGISTER_FORMAT_TRANSFER(FormatTransferFractalZz, FORMAT_ND, FORMAT_FRACTAL_ZZ) | ||||
@@ -161,7 +161,7 @@ Status FormatTransferFracZHwcn::TransFormat(const TransArgs &args, TransResult & | |||||
Status FormatTransferFracZHwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | Status FormatTransferFracZHwcn::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | ||||
Format dst_format, std::vector<int64_t> &dst_shape) { | Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); | GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) | REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) | ||||
@@ -160,7 +160,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & | |||||
Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector<int64_t> &src_shape, DataType data_type, | ||||
Format dst_format, std::vector<int64_t> &dst_shape) { | Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); | GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) | REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) | ||||
@@ -43,8 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector<in | |||||
dst_shape.push_back(cube_size); | dst_shape.push_back(cube_size); | ||||
dst_shape.push_back(cube_size); | dst_shape.push_back(cube_size); | ||||
if (!CheckShapeValid(dst_shape, kC1hwncoc0DimsNum)) { | if (!CheckShapeValid(dst_shape, kC1hwncoc0DimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -197,12 +198,15 @@ Status FormatTransferHwcnC1hwncoc0::TransShape(Format src_format, const std::vec | |||||
DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { | if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { | ||||
if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||||
ShapeToString(src_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); | return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); | ||||
} else if (src_format != FORMAT_HWCN) { | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} else { | } else { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
} | } | ||||
@@ -157,7 +157,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult | |||||
Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | ||||
DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); | GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) | REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) | ||||
@@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ | |||||
Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector<int64_t> &dst_shape) { | ||||
auto c0 = GetCubeSizeByDataType(data_type); | auto c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 < 0) { | if (c0 < 0) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
auto chw = c * h * w; | auto chw = c * h * w; | ||||
@@ -59,8 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||||
dst_shape.push_back(c0); | dst_shape.push_back(c0); | ||||
if (!IsShapeValid(dst_shape)) { | if (!IsShapeValid(dst_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -68,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type | |||||
Status TransShapeNchwToFzC04(const std::vector<int64_t> &src_shape, DataType data_type, | Status TransShapeNchwToFzC04(const std::vector<int64_t> &src_shape, DataType data_type, | ||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
auto n = src_shape.at(kNchwN); | auto n = src_shape.at(kNchwN); | ||||
@@ -293,13 +294,13 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult | |||||
Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector<int64_t> &src_shape, | ||||
DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
if (CheckDataTypeSupport(data_type) != SUCCESS) { | if (CheckDataTypeSupport(data_type) != SUCCESS) { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { | if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { | ||||
return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); | return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); | ||||
} | } | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) | REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) | ||||
@@ -32,12 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
int64_t c0 = GetCubeSizeByDataType(data_type); | int64_t c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 <= 0) { | if (c0 <= 0) { | ||||
GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | if (!CheckShapeValid(src_shape, kNchwDimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||||
ShapeToString(src_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
dst_shape.clear(); | dst_shape.clear(); | ||||
dst_shape.push_back(src_shape.at(kNchwN)); | dst_shape.push_back(src_shape.at(kNchwN)); | ||||
@@ -46,8 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||||
dst_shape.push_back(src_shape.at(kNchwW)); | dst_shape.push_back(src_shape.at(kNchwW)); | ||||
dst_shape.push_back(c0); | dst_shape.push_back(c0); | ||||
if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -193,7 +195,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto | |||||
if (src_format == FORMAT_NCHW) { | if (src_format == FORMAT_NCHW) { | ||||
return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); | return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); | ||||
} else { | } else { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
} | } | ||||
@@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||||
std::vector<int64_t> &dst_shape) { | std::vector<int64_t> &dst_shape) { | ||||
int64_t c0 = GetCubeSizeByDataType(data_type); | int64_t c0 = GetCubeSizeByDataType(data_type); | ||||
if (c0 <= 0) { | if (c0 <= 0) { | ||||
GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
dst_shape.clear(); | dst_shape.clear(); | ||||
dst_shape.push_back(src_shape.at(kNhwcN)); | dst_shape.push_back(src_shape.at(kNhwcN)); | ||||
@@ -44,8 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector<int64_t> &src_shape, DataType d | |||||
dst_shape.push_back(src_shape.at(kNhwcW)); | dst_shape.push_back(src_shape.at(kNhwcW)); | ||||
dst_shape.push_back(c0); | dst_shape.push_back(c0); | ||||
if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", | |||||
ShapeToString(dst_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -189,12 +190,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto | |||||
DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | DataType data_type, Format dst_format, std::vector<int64_t> &dst_shape) { | ||||
if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { | if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { | ||||
if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { | ||||
GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", | |||||
ShapeToString(src_shape).c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); | return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); | ||||
} else if (src_format != FORMAT_NHWC) { | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} else { | } else { | ||||
return UNSUPPORTED; | |||||
return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; | |||||
} | } | ||||
} | } | ||||
@@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vector<int64_t | |||||
std::string error = "Failed to trans shape, do not support transpose from format " + | std::string error = "Failed to trans shape, do not support transpose from format " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | ||||
GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||||
return UNSUPPORTED; | |||||
GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
auto iter = dst_iter->second.find(dst_format); | auto iter = dst_iter->second.find(dst_format); | ||||
if (iter == dst_iter->second.end()) { | if (iter == dst_iter->second.end()) { | ||||
std::string error = "Failed to trans shape, do not support transpose from format " + | std::string error = "Failed to trans shape, do not support transpose from format " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | FmtToStr(TypeUtils::FormatToSerialString(dst_format)); | ||||
GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||||
return UNSUPPORTED; | |||||
GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
perm = iter->second; | perm = iter->second; | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector< | |||||
std::vector<int64_t> perm_arg; | std::vector<int64_t> perm_arg; | ||||
GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); | GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); | ||||
if (!IsShapeArgValid(src_shape, perm_arg)) { | if (!IsShapeArgValid(src_shape, perm_arg)) { | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; | |||||
} | } | ||||
dst_shape = TransShapeByPerm(src_shape, perm_arg); | dst_shape = TransShapeByPerm(src_shape, perm_arg); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form | |||||
std::string error = "Failed to trans data from format " + | std::string error = "Failed to trans data from format " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + | ||||
FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); | ||||
GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); | |||||
return UNSUPPORTED; | |||||
GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); | |||||
return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; | |||||
} | } | ||||
return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); | return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); | ||||
@@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
std::vector<std::string> path_vec; | std::vector<std::string> path_vec; | ||||
SplitPath(path, path_vec); | SplitPath(path, path_vec); | ||||
for (const auto &single_path : path_vec) { | for (const auto &single_path : path_vec) { | ||||
GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, | |||||
GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, | |||||
"The shared library file path is too long!"); | "The shared library file path is too long!"); | ||||
continue); | continue); | ||||
// load break when number of loaded so reach maximum | // load break when number of loaded so reach maximum | ||||
@@ -125,7 +125,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | GE_IF_BOOL_EXEC(error == nullptr, error = ""); | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | ||||
{"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); | {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); | ||||
GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", | |||||
GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, | |||||
"Failed to dlopen the shared library path[%s]. Errormessage[%s]!", | |||||
file_path_dlopen.c_str(), error); | file_path_dlopen.c_str(), error); | ||||
continue; | continue; | ||||
} | } | ||||
@@ -138,8 +139,8 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, | ||||
{"mmDlsym", FmtToStr(func_name) + " is skipped since function" + | {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + | ||||
FmtToStr(func_name) + " is not existed!"}); | FmtToStr(func_name) + " is not existed!"}); | ||||
GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | |||||
func_name.c_str()); | |||||
GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", | |||||
func_name.c_str(), func_name.c_str()); | |||||
is_valid = false; | is_valid = false; | ||||
break; | break; | ||||
} | } | ||||
@@ -479,8 +479,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c | |||||
Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Parse model content failed!"); | |||||
return status; | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | ||||
@@ -517,8 +517,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||||
} | } | ||||
if (is_assign_model_) { | if (is_assign_model_) { | ||||
GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||||
return GE_EXEC_LOAD_MODEL_REPEATED; | |||||
GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); | |||||
return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; | |||||
} | } | ||||
if (ReleaseLocalModelData() != SUCCESS) { | if (ReleaseLocalModelData() != SUCCESS) { | ||||
@@ -528,8 +528,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||||
Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); | ||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Parse model content failed!"); | |||||
return status; | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | file_header_ = reinterpret_cast<ModelFileHeader *>(model_data.model_data); | ||||
@@ -609,7 +609,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||||
GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | GeModelPtr cur_model = ge::MakeShared<ge::GeModel>(); | ||||
Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | Status ret = LoadModelData(om_load_helper, cur_model, mode_index); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||||
return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; | |||||
} | } | ||||
if (is_first_model) { | if (is_first_model) { | ||||
@@ -622,22 +622,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { | |||||
ret = LoadWeights(om_load_helper, cur_model, mode_index); | ret = LoadWeights(om_load_helper, cur_model, mode_index); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||||
return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; | |||||
} | } | ||||
ret = LoadTask(om_load_helper, cur_model, mode_index); | ret = LoadTask(om_load_helper, cur_model, mode_index); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
return GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; | |||||
} | } | ||||
root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); | ||||
} | } | ||||
@@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro | |||||
ge::ModelData &model_data) { | ge::ModelData &model_data) { | ||||
std::string real_path = RealPath(model_path); | std::string real_path = RealPath(model_path); | ||||
if (real_path.empty()) { | if (real_path.empty()) { | ||||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); | |||||
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | ||||
} | } | ||||
@@ -181,7 +181,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||||
if (type != kProfCommandhandleFinalize) { | if (type != kProfCommandhandleFinalize) { | ||||
command.module_index = prof_config_param->profSwitch; | command.module_index = prof_config_param->profSwitch; | ||||
} | } | ||||
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), | |||||
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), | |||||
command.module_index); | command.module_index); | ||||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | ||||
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | ||||
@@ -192,7 +192,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||||
return ge::FAILED; | return ge::FAILED; | ||||
} | } | ||||
GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); | |||||
GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
@@ -540,7 +540,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi | |||||
for (auto device_id_module : device_id_module_map_) { | for (auto device_id_module : device_id_module_map_) { | ||||
if (device_id_module.second != 0) { | if (device_id_module.second != 0) { | ||||
uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | uint32_t device_id = static_cast<uint32_t>(device_id_module.first); | ||||
GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); | |||||
GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second); | |||||
rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); | rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(FAILED, "Runtime profiler stop failed."); | GELOGE(FAILED, "Runtime profiler stop failed."); | ||||
@@ -629,7 +629,7 @@ Status ProfilingManager::ProfParseParam(const std::map<std::string, std::string> | |||||
} | } | ||||
if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { | if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast<int32_t>(device_list.size())) { | ||||
GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); | |||||
GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
#endif | #endif | ||||
@@ -659,7 +659,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
for (int32_t i = 0; i < device_num; i++) { | for (int32_t i = 0; i < device_num; i++) { | ||||
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | ||||
} | } | ||||
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||||
GELOGI("Runtime config param: 0x%lx, device num: %d.", module, device_num); | |||||
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -701,7 +701,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||||
for (int32_t i = 0; i < device_num; i++) { | for (int32_t i = 0; i < device_num; i++) { | ||||
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | ||||
} | } | ||||
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||||
GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num); | |||||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | ||||
@@ -226,7 +226,7 @@ Status GeExecutor::Initialize() { | |||||
} | } | ||||
GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), | ||||
"Failed to initialize OpsKernelBuilders"); | |||||
"Failed to initialize OpsKernelBuilders."); | |||||
// Start profiling | // Start profiling | ||||
Options profiling_options; | Options profiling_options; | ||||
@@ -670,7 +670,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in | |||||
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, | ||||
bool is_offline) { | bool is_offline) { | ||||
if (!is_offline) { | if (!is_offline) { | ||||
(void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); | |||||
(void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); | |||||
} | } | ||||
if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { | if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { | ||||
@@ -37,6 +37,8 @@ using domi::BuildMode; | |||||
namespace { | namespace { | ||||
const int32_t kInvalidPerfLevel = -1; | const int32_t kInvalidPerfLevel = -1; | ||||
const int64_t kProfilingArStep = 2; | |||||
const int64_t kProfilingArStartLogid = 3; | |||||
enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; | enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; | ||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -457,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||||
if (all_reduce_node_index[i] == node_index) { | if (all_reduce_node_index[i] == node_index) { | ||||
GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); | ||||
(void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); | (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); | ||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
return FAILED); | |||||
int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid; | |||||
(void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
@@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
GELOGW("Vector all_memory_size is empty!"); | GELOGW("Vector all_memory_size is empty!"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { | |||||
GELOGE(FAILED, "dividend is 0!"); | |||||
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | |||||
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Memory size is 512 aligned, so it is not necessary to take less than 512 | // Memory size is 512 aligned, so it is not necessary to take less than 512 | ||||
@@ -66,10 +66,7 @@ void AlignMemOffset(size_t &mem_align_size) { | |||||
} | } | ||||
static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | ||||
auto left_node_op_desc = left.node->GetOpDesc(); | |||||
auto right_node_op_desc = right.node->GetOpDesc(); | |||||
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) | |||||
&& (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { | |||||
if (left.GetLifeBegin() < right.GetLifeBegin()) { | |||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
@@ -101,14 +98,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | |||||
auto left_node_op_desc = left.node->GetOpDesc(); | auto left_node_op_desc = left.node->GetOpDesc(); | ||||
auto right_node_op_desc = right.node->GetOpDesc(); | auto right_node_op_desc = right.node->GetOpDesc(); | ||||
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { | if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { | ||||
if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { | |||||
if (left.life_time_end >= static_cast<size_t>(right_node_op_desc->GetId())) { | |||||
if (left.GetLifeBegin() < right.GetLifeBegin()) { | |||||
if (left.life_time_end >= right.GetLifeBegin()) { | |||||
return true; | return true; | ||||
} | } | ||||
} else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { | |||||
} else if (left.GetLifeBegin() == right.GetLifeBegin()) { | |||||
return true; | return true; | ||||
} else { | } else { | ||||
if (right.life_time_end >= static_cast<size_t>(left_node_op_desc->GetId())) { | |||||
if (right.life_time_end >= left.GetLifeBegin()) { | |||||
return true; | return true; | ||||
} | } | ||||
} | } | ||||
@@ -326,12 +323,7 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ | |||||
size_t MemoryBlock::GetLifeBegin() { | size_t MemoryBlock::GetLifeBegin() { | ||||
size_t life_time = 0; | size_t life_time = 0; | ||||
if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
if (node_type_index_list_.front().node != nullptr) { | |||||
auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
life_time = node_op_desc->GetId(); | |||||
} | |||||
} | |||||
life_time = node_type_index_list_.front().GetLifeBegin(); | |||||
} | } | ||||
return life_time; | return life_time; | ||||
} | } | ||||
@@ -418,7 +410,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ | |||||
depend_stream_life_[stream_id_] = GetLifeBegin(); | depend_stream_life_[stream_id_] = GetLifeBegin(); | ||||
} | } | ||||
size_t MemoryBlock::GetLifeEnd() { | |||||
size_t MemoryBlock::GetLifeEnd() const { | |||||
if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
return node_type_index_list_.back().life_time_end; | return node_type_index_list_.back().life_time_end; | ||||
} | } | ||||
@@ -592,32 +584,29 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
for (auto &out_anchor : n->GetAllOutDataAnchors()) { | for (auto &out_anchor : n->GetAllOutDataAnchors()) { | ||||
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | ||||
bool reuse_input = false; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, | |||||
GELOGI("Get reuse_input failed")); | |||||
if (!reuse_input) { | |||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
batch_all_memory_size[batch_label].emplace_back(size); | |||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||||
batch_total_size[batch_label] = size; | |||||
} else { | |||||
batch_total_size[batch_label] += size; | |||||
} | |||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", | |||||
node_op_desc->GetName().c_str(), size); | |||||
return;); | |||||
batch_all_memory_size[batch_label].emplace_back(size); | |||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||||
batch_total_size[batch_label] = size; | |||||
} else { | |||||
batch_total_size[batch_label] += size; | |||||
} | |||||
if (!anchor_to_symbol_.empty()) { | |||||
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); | |||||
if (iter1 == anchor_to_symbol_.end()) { | |||||
continue; | |||||
} | |||||
const std::string &symbol = iter1->second; | |||||
auto iter2 = symbol_size_.find(symbol); | |||||
if (iter2 == symbol_size_.end()) { | |||||
symbol_size_[symbol] = size; | |||||
} else if (size > static_cast<int64_t>(iter2->second)) { | |||||
iter2->second = size; | |||||
} | |||||
if (!anchor_to_symbol_.empty()) { | |||||
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); | |||||
if (iter1 == anchor_to_symbol_.end()) { | |||||
continue; | |||||
} | |||||
const std::string &symbol = iter1->second; | |||||
auto iter2 = symbol_size_.find(symbol); | |||||
if (iter2 == symbol_size_.end()) { | |||||
symbol_size_[symbol] = size; | |||||
} else if (size > static_cast<int64_t>(iter2->second)) { | |||||
iter2->second = size; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -658,35 +647,17 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { | |||||
return false; | return false; | ||||
} | } | ||||
void AddReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) { | |||||
string key = std::to_string(mem_block.Size()); | |||||
key += "_" + std::to_string(mem_block.stream_id_); | |||||
key += "_" + std::to_string(mem_block.memory_type_); | |||||
auto it = reusable_block_counts.find(key); | |||||
if (it != reusable_block_counts.end()) { | |||||
it->second++; | |||||
} else { | |||||
reusable_block_counts[key] = 1; | |||||
} | |||||
} | |||||
void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) { | |||||
string key = std::to_string(mem_block.Size()); | |||||
key += "_" + std::to_string(mem_block.stream_id_); | |||||
key += "_" + std::to_string(mem_block.memory_type_); | |||||
auto it = reusable_block_counts.find(key); | |||||
if (it != reusable_block_counts.end()) { | |||||
if (it->second > 0) { | |||||
it->second--; | |||||
} | |||||
} | |||||
} | |||||
bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block, | |||||
size_t block_size, size_t real_size, bool continuous) { | |||||
bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { | |||||
bool can_reuse = false; | bool can_reuse = false; | ||||
if (reusable_block.Size() == block_size) { | if (reusable_block.Size() == block_size) { | ||||
can_reuse = true; | |||||
// in some continuous input case, continuous first input node's is not same as topo first node. | |||||
if (continuous_life_begin > 0) { | |||||
if (continuous_life_begin > reusable_block.GetLifeEnd()) { | |||||
can_reuse = true; | |||||
} | |||||
} else { | |||||
can_reuse = true; | |||||
} | |||||
} | } | ||||
return can_reuse; | return can_reuse; | ||||
} | } | ||||
@@ -697,6 +668,13 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
return false; | return false; | ||||
} | } | ||||
auto node_desc = n->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); | |||||
return false;); | |||||
std::vector<int64_t> offsets_for_fusion = {}; | |||||
bool has_lx_fusion_attr = | |||||
AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | ||||
auto out_anchor = n->GetOutDataAnchor(out_index); | auto out_anchor = n->GetOutDataAnchor(out_index); | ||||
GE_IF_BOOL_EXEC(out_anchor == nullptr, | GE_IF_BOOL_EXEC(out_anchor == nullptr, | ||||
@@ -719,16 +697,17 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
return false;); | return false;); | ||||
// If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
bool is_input_continuous_no_padding = false; | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, | |||||
is_input_continuous_no_padding); | |||||
if (is_input_continuous_no_padding) { | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); | |||||
if (is_input_continuous) { | |||||
reset_zero_copy_flag = true; | reset_zero_copy_flag = true; | ||||
return false; | |||||
has_lx_fusion_attr = true; | |||||
} else { | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
} | } | ||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), | |||||
// lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later | |||||
GE_IF_BOOL_EXEC(is_input_continuous && | |||||
(CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), | |||||
GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | ||||
no_need_assign_memory = true; | no_need_assign_memory = true; | ||||
return false;); | return false;); | ||||
@@ -742,6 +721,10 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
// Only set attr one times. | // Only set attr one times. | ||||
if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { | if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { | ||||
(void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | ||||
// lx fusion case assign max size for first block, so reuse as none continuous | |||||
GE_IF_BOOL_EXEC(has_lx_fusion_attr, | |||||
is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); | |||||
return false;); | |||||
node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); | node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); | ||||
} | } | ||||
peer_input_index = peer_in_anchor->GetIdx(); | peer_input_index = peer_in_anchor->GetIdx(); | ||||
@@ -754,6 +737,95 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
return false; | return false; | ||||
} | } | ||||
bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { | |||||
if (n == nullptr) { | |||||
return false; | |||||
} | |||||
int64_t max_node_life_time = 0; | |||||
int64_t continuous_input_node_life_time = 0; | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | |||||
auto out_anchor = n->GetOutDataAnchor(out_index); | |||||
if(out_anchor == nullptr) { | |||||
return false; | |||||
} | |||||
// continuous input node's life time should be max | |||||
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ | |||||
return false; | |||||
} | |||||
auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | |||||
GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); | |||||
return false;); | |||||
if(peer_in_node_desc->GetId() > max_node_life_time) { | |||||
max_node_life_time = peer_in_node_desc->GetId(); | |||||
} | |||||
// If GetBool fail, is_input_continuous is false. | |||||
bool is_input_continuous = false; | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); | |||||
if (!is_input_continuous) { | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
} | |||||
if (is_input_continuous) { | |||||
continuous_input_node_life_time = peer_in_node_desc->GetId(); | |||||
} | |||||
} | |||||
} | |||||
return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; | |||||
} | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Check continuous memory reuseable | |||||
/// @return void | |||||
/// | |||||
bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { | |||||
// n,peer_node_desc have been checked | |||||
auto node_desc = n->GetOpDesc(); | |||||
auto peer_node_desc = peer_node->GetOpDesc(); | |||||
continuous_life_begin_ = static_cast<size_t>(node_desc->GetId()); | |||||
// lx fusion case check all continuous input node, firt input node's life time should be min | |||||
for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { | |||||
if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | |||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | |||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | |||||
GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); | |||||
return false; | |||||
} | |||||
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | |||||
/// | |||||
/// node2 node1 node3 | |||||
/// | / / | | |||||
/// node5 node6 | |||||
/// firt input node's life time is not min | |||||
/// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse | |||||
/// | |||||
if (static_cast<size_t>(peer_out_node_desc->GetId()) < continuous_life_begin_) { | |||||
continuous_life_begin_ = static_cast<size_t>(peer_out_node_desc->GetId()); | |||||
GELOGI( | |||||
"Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," | |||||
"min is node[%s] life[%zu]", | |||||
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), | |||||
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); | |||||
} | |||||
// when node3's output node5's life time is not max(node6 > node5), not reuse | |||||
if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), | |||||
in_anchor->GetPeerOutAnchor()->GetIdx())) { | |||||
GELOGI( | |||||
"Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " | |||||
"max life node", | |||||
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), | |||||
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
/// @brief Check pre_reuse flag & post_reuse glag for each symbol | /// @brief Check pre_reuse flag & post_reuse glag for each symbol | ||||
@@ -1039,8 +1111,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | ||||
// A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | |||||
if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, | |||||
real_size, no_align_size); | |||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
if (iter != anchor_to_symbol_.end()) { | if (iter != anchor_to_symbol_.end()) { | ||||
@@ -1049,7 +1122,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
reusable_block->ref_count_++; | reusable_block->ref_count_++; | ||||
ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | |||||
reusable_blocks_[memory_type][stream_id].erase((++it).base()); | reusable_blocks_[memory_type][stream_id].erase((++it).base()); | ||||
return reusable_block; | return reusable_block; | ||||
} | } | ||||
@@ -1062,8 +1134,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
// Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); | |||||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | |||||
block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
block->ref_count_++; | block->ref_count_++; | ||||
block->continuous_block_ = continuous; | block->continuous_block_ = continuous; | ||||
@@ -1220,8 +1291,23 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
std::string symbol; | std::string symbol; | ||||
if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | |||||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||||
return nullptr); | |||||
// reduce old size | |||||
size_t align_size = block->Size(); | |||||
AlignMemOffset(align_size); | |||||
theory_memory_size_ -= align_size; | |||||
auto block_size = GetBlockSize(size, ranges); | |||||
block->SetSize(block_size); | |||||
block->SetLifeTimeEnd(life_time_); | |||||
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | |||||
block->ref_count_++; | block->ref_count_++; | ||||
// add new size | |||||
align_size = block_size; | |||||
AlignMemOffset(align_size); | |||||
theory_memory_size_ += align_size; | |||||
} else { | } else { | ||||
// if ref input is variable, can not find symbol, must judge alone | // if ref input is variable, can not find symbol, must judge alone | ||||
if (IsOutputIndexRef(node_op_desc, index)) { | if (IsOutputIndexRef(node_op_desc, index)) { | ||||
@@ -1281,7 +1367,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | ||||
GELOGI("Get dst_reuse_input_index failed")); | GELOGI("Get dst_reuse_input_index failed")); | ||||
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | ||||
block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); | |||||
out_count_reuse_input += 1; | out_count_reuse_input += 1; | ||||
reuse_input = true; | reuse_input = true; | ||||
} | } | ||||
@@ -1322,7 +1407,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i | |||||
if (static_cast<uint32_t>(index) == output_index) { | if (static_cast<uint32_t>(index) == output_index) { | ||||
if (node->GetOwnerComputeGraph() != nullptr) { | if (node->GetOwnerComputeGraph() != nullptr) { | ||||
string graph_name = node->GetOwnerComputeGraph()->GetName(); | string graph_name = node->GetOwnerComputeGraph()->GetName(); | ||||
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||||
GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||||
op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | ||||
} | } | ||||
return true; | return true; | ||||
@@ -1360,7 +1445,6 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock | |||||
if (to_release->same_stream_) { | if (to_release->same_stream_) { | ||||
to_release->SetLifeTimeEnd(life_time_); | to_release->SetLifeTimeEnd(life_time_); | ||||
reusable_memory.emplace_back(to_release); | reusable_memory.emplace_back(to_release); | ||||
AddReusableBlockCount(*to_release, reusable_block_counts_); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -1460,6 +1544,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
} | } | ||||
is_op_reuse_mem_ = true; | is_op_reuse_mem_ = true; | ||||
continuous_life_begin_ = 0; | |||||
if (op_reuse_env_valid_ == true) { | if (op_reuse_env_valid_ == true) { | ||||
vector<string>::iterator it_name = | vector<string>::iterator it_name = | ||||
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); | std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); | ||||
@@ -1516,7 +1601,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
continue; | continue; | ||||
} | } | ||||
// atomic can't be reused | // atomic can't be reused | ||||
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; | |||||
bool need_change = is_op_reuse_mem_ && is_atomic; | |||||
if (need_change) { | if (need_change) { | ||||
is_op_reuse_mem_ = false; | is_op_reuse_mem_ = false; | ||||
} | } | ||||
@@ -1909,11 +1994,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, | |||||
} | } | ||||
op_desc->SetWorkspace(workspace_list); | op_desc->SetWorkspace(workspace_list); | ||||
} | } | ||||
GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " | |||||
"life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), | |||||
op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), | |||||
block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, | |||||
block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | |||||
GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " | |||||
"noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", | |||||
graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), | |||||
node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, | |||||
block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, | |||||
block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | |||||
block->batch_label_.c_str()); | block->batch_label_.c_str()); | ||||
} | } | ||||
@@ -39,14 +39,15 @@ using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | |||||
enum OpMemoryType { kOutput, kWorkspace }; | enum OpMemoryType { kOutput, kWorkspace }; | ||||
struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} | |||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} | |||||
ge::NodePtr node = nullptr; | ge::NodePtr node = nullptr; | ||||
OpMemoryType mem_type = kOutput; | OpMemoryType mem_type = kOutput; | ||||
uint32_t index = 0; | uint32_t index = 0; | ||||
size_t life_time_end = kMaxLifeTime; | |||||
bool ref_input = false; | bool ref_input = false; | ||||
size_t life_time_begin = 0; | |||||
size_t life_time_end = kMaxLifeTime; | |||||
const string GetMemType() const { | const string GetMemType() const { | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
return "output"; | return "output"; | ||||
@@ -55,6 +56,34 @@ struct NodeTypeIndex { | |||||
} | } | ||||
return "unknown"; | return "unknown"; | ||||
} | } | ||||
size_t GetLifeBegin() const { | |||||
if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { | |||||
return 0; | |||||
} | |||||
if ((life_time_begin > 0) && (life_time_begin < static_cast<size_t>(node->GetOpDesc()->GetId()))) { | |||||
return life_time_begin; | |||||
} else { | |||||
return node->GetOpDesc()->GetId(); | |||||
} | |||||
} | |||||
std::string GetLifeBeginDesc() const { | |||||
if (node == nullptr) { | |||||
return ""; | |||||
} | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
auto life_begin = GetLifeBegin(); | |||||
if (life_begin != static_cast<size_t>(node_op_desc->GetId())) { | |||||
return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); | |||||
} else { | |||||
return std::to_string(node_op_desc->GetId()); | |||||
} | |||||
} | |||||
return ""; | |||||
} | |||||
}; | }; | ||||
class MemoryBlock { | class MemoryBlock { | ||||
@@ -86,16 +115,13 @@ class MemoryBlock { | |||||
symbol_list_.clear(); | symbol_list_.clear(); | ||||
} | } | ||||
void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, | |||||
int64_t stream_id) { | |||||
real_size_list_.emplace_back(real_size); | |||||
no_align_size_list_.emplace_back(no_align_size); | |||||
node_type_index_list_.emplace_back(node, type, out_index, false); | |||||
if (stream_id != stream_id_) { | |||||
same_stream_ = false; | |||||
size_t Size() const { return block_size_; } | |||||
void SetSize(size_t size) { | |||||
if (size > block_size_) { | |||||
block_size_ = size; | |||||
} | } | ||||
} | } | ||||
size_t Size() const { return block_size_; } | |||||
size_t AlignSize() const; | size_t AlignSize() const; | ||||
@@ -143,7 +169,7 @@ class MemoryBlock { | |||||
size_t GetLifeBegin(); | size_t GetLifeBegin(); | ||||
size_t GetLifeEnd(); | |||||
size_t GetLifeEnd() const; | |||||
void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | ||||
@@ -406,6 +432,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); | uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); | ||||
bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
/// @|+++++++++block1++++++++| |+++++++++block1++++++++| | /// @|+++++++++block1++++++++| |+++++++++block1++++++++| | ||||
@@ -429,8 +456,6 @@ class BlockMemAssigner : public MemAssigner { | |||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | ||||
std::map<std::string, uint64_t> reusable_block_counts_; | |||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | ||||
std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_; | std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_; | ||||
@@ -460,6 +485,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
std::string max_batch_label_; | std::string max_batch_label_; | ||||
size_t continuous_life_begin_ = 0; | |||||
/// | /// | ||||
/// @ [stream1][nodeid] | /// @ [stream1][nodeid] | ||||
/// @[nodeid] [stream2][nodeid] | /// @[nodeid] [stream2][nodeid] | ||||
@@ -119,31 +119,15 @@ class GraphMemoryAssigner { | |||||
/// | /// | ||||
ge::Status ReAssignContinuousMemory(bool is_loop_graph); | ge::Status ReAssignContinuousMemory(bool is_loop_graph); | ||||
ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); | |||||
ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); | |||||
ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); | |||||
ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); | |||||
ge::Status ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, int32_t mem_reuse_model); | |||||
ge::Status GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map, | |||||
int32_t mem_reuse_model, string &max_batch_label); | |||||
ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, | |||||
int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); | |||||
ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ||||
ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ||||
map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | ||||
ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size, int64_t memory_type); | |||||
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); | |||||
ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); | |||||
ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); | |||||
/// | /// | ||||
/// @brief check the input of node whether support atomic attr | /// @brief check the input of node whether support atomic attr | ||||
@@ -169,10 +153,10 @@ class GraphMemoryAssigner { | |||||
ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | ||||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ||||
const std::vector<int64_t> &mem_offset_end); | |||||
const std::vector<int64_t> &mem_offset_end, int64_t memory_type); | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | ||||
const std::vector<int64_t> &atomic_mem_size); | |||||
const std::vector<int64_t> &atomic_mem_size, int64_t memory_type); | |||||
ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ||||
@@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const { | |||||
auto parent_graph_ptr = graph->GetParentGraph(); | |||||
if (parent_graph_ptr == nullptr) { | |||||
return false; | |||||
} | |||||
auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr); | |||||
if (root_graph_ptr == nullptr) { | |||||
return false; | |||||
} | |||||
return root_graph_ptr->GetGraphUnknownFlag(); | |||||
} | |||||
Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, | ||||
vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | vector<domi::TaskDef> &task_def_list, map<uint32_t, string> &op_name_map) { | ||||
GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); | GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); | ||||
@@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
}; | }; | ||||
GE_MAKE_GUARD(release, callback); | GE_MAKE_GUARD(release, callback); | ||||
uint64_t all_reduce_node_idx = 0; | |||||
for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
@@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
// Part2: Call | // Part2: Call | ||||
auto fusion_task_info = | auto fusion_task_info = | ||||
FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, | ||||
ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; | |||||
ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; | |||||
GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), | ||||
"Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); | ||||
// continue directly | // continue directly | ||||
@@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
type.c_str()); | type.c_str()); | ||||
// Profiling task | // Profiling task | ||||
size_t task_list_size_before = task_def_list.size(); | size_t task_list_size_before = task_def_list.size(); | ||||
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||||
node_index, task_def_list, all_reduce_node_idx)); | |||||
GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
int64_t op_id = op_desc->GetId(); | int64_t op_id = op_desc->GetId(); | ||||
// Compatible with dynamic shape scenes, the default is 0 | // Compatible with dynamic shape scenes, the default is 0 | ||||
int64_t stream_id = 0; | int64_t stream_id = 0; | ||||
@@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra | |||||
return ret; | return ret; | ||||
} | } | ||||
// Profiling task | // Profiling task | ||||
GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||||
node_index, task_def_list, all_reduce_node_idx)); | |||||
GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); | |||||
size_t task_list_size_after = task_def_list.size(); | size_t task_list_size_after = task_def_list.size(); | ||||
// If tasks is reduced | // If tasks is reduced | ||||
if (task_list_size_after < task_list_size_before) { | if (task_list_size_after < task_list_size_before) { | ||||
@@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
auto &op_name_map = fusion_task_info.op_name_map; | auto &op_name_map = fusion_task_info.op_name_map; | ||||
auto &profiling_point = fusion_task_info.profiling_point; | auto &profiling_point = fusion_task_info.profiling_point; | ||||
auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; | auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; | ||||
auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; | |||||
// If op_desc have this attr, call nodes with same group key in a stream together | // If op_desc have this attr, call nodes with same group key in a stream together | ||||
if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && | if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && | ||||
(fusion_nodes_seen.count(node.get()) == 0)) { | (fusion_nodes_seen.count(node.get()) == 0)) { | ||||
@@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
// profiling task | // profiling task | ||||
(void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, | |||||
node_index, task_def_list, all_reduce_idx); | |||||
(void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||||
run_context.stream = run_context.graphStreamList[stream_id]; | run_context.stream = run_context.graphStreamList[stream_id]; | ||||
GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", | GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", | ||||
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); | op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); | ||||
@@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
return ret; | return ret; | ||||
} | } | ||||
// profiling task | // profiling task | ||||
(void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, | |||||
node_index, task_def_list, all_reduce_idx); | |||||
(void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); | |||||
size_t task_list_size_after = task_def_list.size(); | size_t task_list_size_after = task_def_list.size(); | ||||
// if tasks is reduced | // if tasks is reduced | ||||
if (task_list_size_after < task_list_size_before) { | if (task_list_size_after < task_list_size_before) { | ||||
@@ -526,6 +533,13 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
return GE_GRAPH_GRAPH_NODE_NULL; | return GE_GRAPH_GRAPH_NODE_NULL; | ||||
} | } | ||||
int64_t node_index = 0; | |||||
for (auto &node : all_nodes) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
op_desc->SetId(node_index++); | |||||
} | |||||
map<int64_t, vector<OpDescPtr>> all_stream_ops; | map<int64_t, vector<OpDescPtr>> all_stream_ops; | ||||
for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -673,7 +687,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP | |||||
} | } | ||||
} | } | ||||
if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||||
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { | |||||
profiling_point.end_index.insert(current_idx); | profiling_point.end_index.insert(current_idx); | ||||
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | ||||
op_desc->GetName().c_str(), current_idx); | op_desc->GetName().c_str(), current_idx); | ||||
@@ -842,6 +856,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
GELOGD("Profiling is not open."); | GELOGD("Profiling is not open."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
// subgraph of dynamic graph no need to find index, has been found in parent graph | |||||
if (IsSubGraphOfDynamicGraph(graph)) { | |||||
GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
GELOGI("Start get FP/BP index."); | GELOGI("Start get FP/BP index."); | ||||
std::string fp_point_str; | std::string fp_point_str; | ||||
std::string bp_point_str; | std::string bp_point_str; | ||||
@@ -879,9 +900,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||||
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||||
bool is_insert_bp_profiling_task) { | |||||
bool is_insert_all_reduce_task = false; | |||||
int64_t ar_log_id = 0xFFFF; | |||||
if (is_insert_bp_profiling_task) { | |||||
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); | |||||
is_insert_all_reduce_task = true; | |||||
} | |||||
if (!is_insert_all_reduce_task) { | |||||
for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||||
if (all_reduce_nodes[i] == node_index) { | |||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
return FAILED); | |||||
ar_log_id = i * kProfilingArStep + kProfilingArStartLogid; | |||||
is_insert_all_reduce_task = true; | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
if (is_insert_all_reduce_task) { | |||||
GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); | |||||
TaskDef ar_task_def; | |||||
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||||
if (ar_log_def != nullptr) { | |||||
ar_log_def->set_logid(ar_log_id); | |||||
ar_log_def->set_notify(false); | |||||
} | |||||
task_def_list.push_back(ar_task_def); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx) { | |||||
vector<domi::TaskDef> &task_def_list) { | |||||
const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ProfilingManager::Instance().ProfilingTrainingTraceOn(); | ||||
@@ -924,19 +983,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
} | } | ||||
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
uint64_t all_reduce_task_idx = 0; | |||||
if (is_all_reduce) { | |||||
(void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index, | |||||
task_def_list, is_insert_bp_profiling_task); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||||
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||||
bool is_insert_bp_profiling_task) { | |||||
bool is_insert_all_reduce_task = false; | bool is_insert_all_reduce_task = false; | ||||
if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
all_reduce_task_idx = all_reduce_node_idx; | |||||
int64_t ar_log_id = 0xFFFF; | |||||
if (is_insert_bp_profiling_task) { | |||||
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); | |||||
ar_log_id += 1; | |||||
is_insert_all_reduce_task = true; | is_insert_all_reduce_task = true; | ||||
} | } | ||||
if (is_all_reduce) { | |||||
all_reduce_node_idx++; | |||||
} | |||||
if (!is_insert_all_reduce_task) { | if (!is_insert_all_reduce_task) { | ||||
for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | ||||
if (all_reduce_nodes[i] == node_index) { | if (all_reduce_nodes[i] == node_index) { | ||||
all_reduce_task_idx = i; | |||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), | |||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
return FAILED); | |||||
ar_log_id = i * kProfilingArStep + kProfilingArEndLogid; | |||||
is_insert_all_reduce_task = true; | is_insert_all_reduce_task = true; | ||||
break; | break; | ||||
} | } | ||||
@@ -944,28 +1015,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const | |||||
} | } | ||||
if (is_insert_all_reduce_task) { | if (is_insert_all_reduce_task) { | ||||
GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); | |||||
TaskDef ar_task_def; | TaskDef ar_task_def; | ||||
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | ||||
ar_task_def.set_stream_id(op_desc->GetStreamId()); | ar_task_def.set_stream_id(op_desc->GetStreamId()); | ||||
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | ||||
if (ar_log_def != nullptr) { | if (ar_log_def != nullptr) { | ||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
return FAILED); | |||||
auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; | |||||
ar_log_def->set_logid(log_id); | |||||
ar_log_def->set_logid(ar_log_id); | |||||
ar_log_def->set_notify(false); | ar_log_def->set_notify(false); | ||||
(void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | |||||
} | } | ||||
task_def_list.push_back(ar_task_def); | task_def_list.push_back(ar_task_def); | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx) { | |||||
vector<domi::TaskDef> &task_def_list) { | |||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
const char *profiling_mode = std::getenv(kProfilingMode); | const char *profiling_mode = std::getenv(kProfilingMode); | ||||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | ||||
@@ -1010,36 +1077,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P | |||||
task_def_list.emplace_back(end_task_def); | task_def_list.emplace_back(end_task_def); | ||||
} | } | ||||
uint32_t all_reduce_task_idx = 0; | |||||
bool is_insert_all_reduce_task = false; | |||||
if (is_all_reduce && is_insert_bp_profiling_task) { | |||||
all_reduce_task_idx = all_reduce_node_idx; | |||||
is_insert_all_reduce_task = true; | |||||
} | |||||
for (size_t i = 0; i < all_reduce_nodes.size(); i++) { | |||||
if (all_reduce_nodes[i] == node_index) { | |||||
all_reduce_task_idx = i; | |||||
is_insert_all_reduce_task = true; | |||||
break; | |||||
} | |||||
if (is_all_reduce) { | |||||
(void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index, | |||||
task_def_list, is_insert_bp_profiling_task); | |||||
} | } | ||||
if (is_insert_all_reduce_task) { | |||||
GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); | |||||
TaskDef ar_task_def; | |||||
ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); | |||||
ar_task_def.set_stream_id(op_desc->GetStreamId()); | |||||
LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); | |||||
GE_CHECK_NOTNULL(ar_log_def); | |||||
GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), | |||||
GELOGE(FAILED, "Multiply result is out of range."); | |||||
return FAILED); | |||||
auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; | |||||
ar_log_def->set_logid(log_id); | |||||
ar_log_def->set_notify(false); | |||||
task_def_list.emplace_back(ar_task_def); | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -129,10 +129,16 @@ class TaskGenerator { | |||||
std::vector<uint32_t> &all_reduce_nodes) const; | std::vector<uint32_t> &all_reduce_nodes) const; | ||||
Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
std::vector<domi::TaskDef> &task_def_list, uint64_t &all_reduce_node_idx); | |||||
std::vector<domi::TaskDef> &task_def_list); | |||||
Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||||
uint32_t node_index, std::vector<domi::TaskDef> &task_def_listy, | |||||
bool is_insert_bp_profiling_task); | |||||
Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, | ||||
std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | std::vector<uint32_t> &all_reduce_nodes, uint32_t node_index, | ||||
std::vector<domi::TaskDef> &task_def_list, uint64_t all_reduce_node_idx); | |||||
std::vector<domi::TaskDef> &task_def_list); | |||||
Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector<uint32_t> &all_reduce_nodes, | |||||
uint32_t node_index, std::vector<domi::TaskDef> &task_def_list, | |||||
bool is_insert_bp_profiling_task); | |||||
static bool IsProfPoint(const OpDescPtr &op, const std::string &name); | static bool IsProfPoint(const OpDescPtr &op, const std::string &name); | ||||
@@ -155,6 +161,8 @@ class TaskGenerator { | |||||
Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); | Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); | ||||
bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const; | |||||
uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
uint64_t var_mem_size_ = 0; | uint64_t var_mem_size_ = 0; | ||||
}; | }; | ||||
@@ -820,6 +820,7 @@ Status DataDumper::UnloadDumpInfo() { | |||||
for (const auto &op_iter : op_list_) { | for (const auto &op_iter : op_list_) { | ||||
aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
task.set_task_id(op_iter.task_id); | task.set_task_id(op_iter.task_id); | ||||
task.set_stream_id(op_iter.stream_id); | |||||
op_mapping_info.mutable_task()->Add(std::move(task)); | op_mapping_info.mutable_task()->Add(std::move(task)); | ||||
} | } | ||||
auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); | ||||
@@ -834,7 +835,6 @@ void DataDumper::DumpShrink() { | |||||
compute_graph_.reset(); | compute_graph_.reset(); | ||||
input_map_.clear(); | input_map_.clear(); | ||||
ref_info_.clear(); | ref_info_.clear(); | ||||
op_list_.clear(); | |||||
} | } | ||||
void DataDumper::PrintCheckLog(string &dump_list_key) { | void DataDumper::PrintCheckLog(string &dump_list_key) { | ||||
@@ -446,20 +446,23 @@ void DavinciModel::InitRuntimeParams() { | |||||
runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | ||||
} | } | ||||
void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { | |||||
const set<string> hcom_opp_types({ | |||||
HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, | |||||
HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE | |||||
}); | |||||
void DavinciModel::CheckHasHcomOp() { | |||||
Graph graph = ge_model_->GetGraph(); | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
if (compute_graph == nullptr) { | |||||
return; | |||||
} | |||||
for (const auto &node : compute_graph->GetAllNodes()) { | for (const auto &node : compute_graph->GetAllNodes()) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); | ||||
if (hcom_opp_types.count(op_desc->GetType()) > 0) { | |||||
uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||||
hcom_streams_.emplace(stream_id); | |||||
GELOGD("hcom stream: %u.", stream_id); | |||||
} | |||||
GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || | |||||
(op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || | |||||
(op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || | |||||
(op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || | |||||
(op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || | |||||
(op_desc->GetType() == HCOMREDUCE)), | |||||
uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||||
(void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); | |||||
} | } | ||||
} | } | ||||
@@ -621,6 +624,7 @@ void DavinciModel::OpDebugUnRegister() { | |||||
// initialize op sequence and call initialization function of each op respectively | // initialize op sequence and call initialization function of each op respectively | ||||
Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | ||||
// validating params | // validating params | ||||
GELOGI("Priority is %d", priority_); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID, | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID, | ||||
"Priority must between 0-7, now is %d", priority_); | "Priority must between 0-7, now is %d", priority_); | ||||
GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null."); | GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null."); | ||||
@@ -638,7 +642,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
name_ = ge_model_->GetName(); | name_ = ge_model_->GetName(); | ||||
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); | (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); | ||||
GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); | GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); | ||||
CheckHasHcomOp(compute_graph); | |||||
CheckHasHcomOp(); | |||||
vector<int64_t> huge_stream_list; | vector<int64_t> huge_stream_list; | ||||
(void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); | (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); | ||||
@@ -1024,7 +1028,7 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
const vector<OpDescPtr> &output_op_list) { | const vector<OpDescPtr> &output_op_list) { | ||||
GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | ||||
for (auto &item : data_by_index) { | for (auto &item : data_by_index) { | ||||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||||
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||||
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | ||||
input_addrs_list_.emplace_back(output_addrs); | input_addrs_list_.emplace_back(output_addrs); | ||||
@@ -1032,18 +1036,14 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | ||||
GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | ||||
GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | ||||
GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); | |||||
if (item.second->GetType() == AIPP_DATA_TYPE) { | if (item.second->GetType() == AIPP_DATA_TYPE) { | ||||
GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | ||||
is_dynamic_aipp_ = true; | is_dynamic_aipp_ = true; | ||||
} | } | ||||
} | } | ||||
vector<string> out_node_name; | |||||
(void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); | |||||
GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); | |||||
for (const auto &op_desc : output_op_list) { | for (const auto &op_desc : output_op_list) { | ||||
const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||||
auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||||
GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | ||||
output_addrs_list_.emplace_back(input_addrs); | output_addrs_list_.emplace_back(input_addrs); | ||||
@@ -1061,11 +1061,10 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
if (InitOutputTensorInfo(op_desc) != SUCCESS) { | if (InitOutputTensorInfo(op_desc) != SUCCESS) { | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); | |||||
} | } | ||||
return SUCCESS; | |||||
GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); | |||||
return InitOutputDescInfo(output_op_list); | |||||
} | } | ||||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
@@ -1810,16 +1809,16 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp | |||||
/// | /// | ||||
Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | ||||
if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { | if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { | ||||
GELOGW("there is not AIPP related with index %u.", index); | |||||
GELOGW("There is not AIPP related with index %u.", index); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
domi::AippOpParams aipp_params; | domi::AippOpParams aipp_params; | ||||
GeAttrValue::NAMED_ATTRS aipp_attr; | GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||||
"Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | ||||
GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); | op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); | ||||
AippConfigInfo aipp_info; | AippConfigInfo aipp_info; | ||||
@@ -1981,24 +1980,27 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||||
} | } | ||||
} | } | ||||
Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { | |||||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) { | |||||
for (const auto &item : data_by_index) { | |||||
const auto op_desc = item.second; | |||||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
InputOutputDescInfo input; | |||||
ShapeDescription dims_info; | |||||
Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||||
InputOutputDescInfo input; | |||||
ShapeDescription dims_info; | |||||
Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||||
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
input.name = op_desc->GetName(); | |||||
int64_t input_size = 0; | |||||
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
input.size = input_size; | |||||
input_formats_.push_back(format); | |||||
input_descs_.push_back(input); | |||||
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
input.name = op_desc->GetName(); | |||||
int64_t input_size = 0; | |||||
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
input.size = input_size; | |||||
input_formats_.push_back(format); | |||||
input_descs_.push_back(input); | |||||
input.shape_info = dims_info; | |||||
input_descs_dims_.push_back(input); | |||||
input.shape_info = dims_info; | |||||
input_descs_dims_.push_back(input); | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2064,31 +2066,37 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||||
output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | ||||
} | } | ||||
Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name) { | |||||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
for (uint32_t i = 0; i < out_size; ++i) { | |||||
string output_name; | |||||
InputOutputDescInfo output; | |||||
uint32_t format_result; | |||||
CreateOutput(i, op_desc, output, format_result); | |||||
std::vector<std::string> src_name = op_desc->GetSrcName(); | |||||
std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||||
GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, | |||||
"construct output_name failed."); | |||||
// forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||||
if (out_size == out_node_name.size()) { | |||||
// neweast plan, the index will add to name during generate model. | |||||
bool contains_colon = out_node_name[i].find(":") != std::string::npos; | |||||
output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); | |||||
} else { | |||||
output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); | |||||
Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) { | |||||
GELOGD("Output node size: %zu", output_op_list.size()); | |||||
vector<string> out_node_name; | |||||
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); | |||||
for (const auto &op_desc : output_op_list) { | |||||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
for (uint32_t index = 0; index < out_size; index++) { | |||||
string output_name; | |||||
InputOutputDescInfo output; | |||||
uint32_t format_result; | |||||
CreateOutput(index, op_desc, output, format_result); | |||||
std::vector<std::string> src_name = op_desc->GetSrcName(); | |||||
std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||||
GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, | |||||
"construct output_name failed."); | |||||
// forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||||
if (out_size == out_node_name.size()) { | |||||
// neweast plan, the index will add to name during generate model. | |||||
bool contains_colon = out_node_name[index].find(":") != std::string::npos; | |||||
output_name = | |||||
contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); | |||||
} else { | |||||
output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | |||||
std::to_string(src_index[index]); | |||||
} | |||||
output.name = output_name; | |||||
output_descs_.push_back(output); | |||||
output_formats_.push_back(format_result); | |||||
} | } | ||||
output.name = output_name; | |||||
output_descs_.push_back(output); | |||||
output_formats_.push_back(format_result); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2470,7 +2478,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
uint64_t buffer_length = buffer.length; | uint64_t buffer_length = buffer.length; | ||||
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | ||||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||||
GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||||
runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | ||||
GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | ||||
idx++; | idx++; | ||||
@@ -3959,8 +3967,11 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||||
} | } | ||||
data_dumper_.SetDeviceId(device_id); | data_dumper_.SetDeviceId(device_id); | ||||
// set loop count addr | |||||
auto get_var_addr = [&](const string &name) -> void *{ | |||||
if (known_node_) { | |||||
data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr); | |||||
} else { | |||||
// set loop count addr | |||||
auto get_var_addr = [&](const string &name) -> void *{ | |||||
const auto it = variable_by_name.find(name); | const auto it = variable_by_name.find(name); | ||||
if (it != variable_by_name.end()) { | if (it != variable_by_name.end()) { | ||||
const auto output_sizes = ModelUtils::GetOutputSize(it->second); | const auto output_sizes = ModelUtils::GetOutputSize(it->second); | ||||
@@ -3973,10 +3984,10 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<str | |||||
GELOGD("op: %s is null.", name.c_str()); | GELOGD("op: %s is null.", name.c_str()); | ||||
return nullptr; | return nullptr; | ||||
}; | }; | ||||
data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP), | data_dumper_.SetLoopAddr(get_var_addr(NODE_NAME_GLOBAL_STEP), | ||||
get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), | get_var_addr(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), | ||||
get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND)); | get_var_addr(NODE_NAME_FLOWCTRL_LOOP_COND)); | ||||
} | |||||
} | } | ||||
uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | ||||
@@ -470,6 +470,10 @@ class DavinciModel { | |||||
data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); | ||||
} | } | ||||
void SetKnownShapeGlobalStep(void *global_step) { | |||||
known_shape_global_step_ = global_step; | |||||
} | |||||
void DumperShrink() { | void DumperShrink() { | ||||
data_dumper_.DumpShrink(); | data_dumper_.DumpShrink(); | ||||
} | } | ||||
@@ -827,7 +831,7 @@ class DavinciModel { | |||||
void OpDebugUnRegister(); | void OpDebugUnRegister(); | ||||
void CheckHasHcomOp(const ComputeGraphPtr &graph); | |||||
void CheckHasHcomOp(); | |||||
Status DoTaskSink(); | Status DoTaskSink(); | ||||
@@ -850,8 +854,8 @@ class DavinciModel { | |||||
Status InitOutputTensorInfo(const OpDescPtr &op_desc); | Status InitOutputTensorInfo(const OpDescPtr &op_desc); | ||||
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | ||||
Status InitInputDescInfo(const OpDescPtr &op_desc); | |||||
Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name); | |||||
Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index); | |||||
Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list); | |||||
Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | ||||
Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | ||||
@@ -1057,6 +1061,9 @@ class DavinciModel { | |||||
vector<uint32_t> input_formats_; | vector<uint32_t> input_formats_; | ||||
vector<InputOutputDescInfo> output_descs_; | vector<InputOutputDescInfo> output_descs_; | ||||
vector<uint32_t> output_formats_; | vector<uint32_t> output_formats_; | ||||
// known shape node for dump | |||||
void *known_shape_global_step_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -1428,7 +1428,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me | |||||
uint8_t *model_data = nullptr; | uint8_t *model_data = nullptr; | ||||
uint32_t model_len = 0; | uint32_t model_len = 0; | ||||
Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "parse model content failed!"); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); | |||||
OmFileLoadHelper om_file_helper; | OmFileLoadHelper om_file_helper; | ||||
ret = om_file_helper.Init(model_data, model_len); | ret = om_file_helper.Init(model_data, model_len); | ||||
@@ -192,7 +192,7 @@ void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { | |||||
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), | ||||
op_desc->GetName())) { | op_desc->GetName())) { | ||||
dump_flag_ = RT_KERNEL_DUMPFLAG; | dump_flag_ = RT_KERNEL_DUMPFLAG; | ||||
dump_args_ = input_output_addr_; | |||||
dump_args_ = addr; | |||||
} | } | ||||
} | } | ||||
@@ -100,14 +100,14 @@ Status CachingAllocator::Initialize(uint32_t device_id) { | |||||
} | } | ||||
auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator); | auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator); | ||||
if (bin_ptr == nullptr) { | if (bin_ptr == nullptr) { | ||||
GELOGE(ge::FAILED, "Alloc BlockBin failed."); | |||||
return ge::FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
free_block_bins_[i] = bin_ptr; | free_block_bins_[i] = bin_ptr; | ||||
} | } | ||||
memory_allocator_ = MemManager::Instance(memory_type_); | memory_allocator_ = MemManager::Instance(memory_type_); | ||||
if (memory_allocator_ == nullptr) { | if (memory_allocator_ == nullptr) { | ||||
return ge::FAILED; | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
@@ -730,7 +730,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | ||||
GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | ||||
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | ||||
GM_RUN_AND_DUMP_PERF("OptimizeBeforeBuildForRts", stages.optimizer.OptimizeGraphBeforeBuildForRts, compute_graph); | |||||
GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", | |||||
GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, | |||||
compute_graph); | |||||
Status ret = compute_graph->TopologicalSorting(); | Status ret = compute_graph->TopologicalSorting(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -64,9 +64,10 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, size_t memory_size | |||||
Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) const { | Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) const { | ||||
GELOGI("MemoryAllocator::FreeMemory device_id = %u", device_id); | GELOGI("MemoryAllocator::FreeMemory device_id = %u", device_id); | ||||
if (rtFree(memory_addr) != RT_ERROR_NONE) { | |||||
GELOGE(ge::INTERNAL_ERROR, "MemoryAllocator::MallocMemory device_id = %u", device_id); | |||||
return ge::INTERNAL_ERROR; | |||||
auto rtRet = rtFree(memory_addr); | |||||
if (rtRet != RT_ERROR_NONE) { | |||||
GELOGE(rtRet, "MemoryAllocator::MallocMemory device_id = %u", device_id); | |||||
return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
} | } | ||||
memory_addr = nullptr; | memory_addr = nullptr; | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -168,31 +169,36 @@ Status MemManager::Initialize(const std::vector<rtMemType_t> &memory_type) { | |||||
memory_allocator_map_[index] = memory_allocator; | memory_allocator_map_[index] = memory_allocator; | ||||
GELOGI("Create MemoryAllocator memory type[%u] success.", index); | GELOGI("Create MemoryAllocator memory type[%u] success.", index); | ||||
} else { | } else { | ||||
GELOGE(ge::INTERNAL_ERROR, "Alloc MemoryAllocator failed."); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); | |||||
} | } | ||||
} else { | } else { | ||||
memory_allocator = it->second; | memory_allocator = it->second; | ||||
} | } | ||||
if (memory_allocator == nullptr) { | if (memory_allocator == nullptr) { | ||||
GELOGE(ge::INTERNAL_ERROR, "Create MemoryAllocator failed."); | |||||
return ge::INTERNAL_ERROR; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} else { | } else { | ||||
memory_allocator->Initialize(0); | memory_allocator->Initialize(0); | ||||
} | } | ||||
} | } | ||||
if (InitAllocator(memory_type, caching_allocator_map_) != SUCCESS) { | |||||
GELOGE(ge::INTERNAL_ERROR, "Create CachingAllocator failed."); | |||||
return ge::INTERNAL_ERROR; | |||||
auto ret = InitAllocator(memory_type, caching_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create CachingAllocator failed."); | |||||
return ret; | |||||
} | } | ||||
if (InitAllocator(memory_type, rdma_allocator_map_) != SUCCESS) { | |||||
GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); | |||||
return ge::INTERNAL_ERROR; | |||||
ret = InitAllocator(memory_type, rdma_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create RdmaAllocator failed."); | |||||
return ret; | |||||
} | } | ||||
if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { | |||||
GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); | |||||
return ge::INTERNAL_ERROR; | |||||
ret = InitAllocator(memory_type, host_allocator_map_); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "Create HostMemAllocator failed."); | |||||
return ret; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -229,7 +235,7 @@ MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { | |||||
// Usually impossible | // Usually impossible | ||||
if (memory_allocator == nullptr) { | if (memory_allocator == nullptr) { | ||||
GELOGE(ge::INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); | |||||
static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); | static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); | ||||
return &default_memory_allocator; | return &default_memory_allocator; | ||||
} | } | ||||
@@ -192,18 +192,18 @@ class MemManager { | |||||
allocate_map[index] = allocator; | allocate_map[index] = allocator; | ||||
GELOGI("Create Allocator memory type[%u] success.", index); | GELOGI("Create Allocator memory type[%u] success.", index); | ||||
} else { | } else { | ||||
GELOGE(INTERNAL_ERROR, "Alloc Allocator failed."); | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); | |||||
} | } | ||||
} else { | } else { | ||||
allocator = it->second; | allocator = it->second; | ||||
} | } | ||||
if (allocator == nullptr) { | if (allocator == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "Create Allocator failed."); | |||||
return INTERNAL_ERROR; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} else { | } else { | ||||
if (allocator->Initialize() != SUCCESS) { | if (allocator->Initialize() != SUCCESS) { | ||||
return INTERNAL_ERROR; | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -51,7 +51,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) | |||||
Status RdmaPoolAllocator::Initialize() { | Status RdmaPoolAllocator::Initialize() { | ||||
memory_allocator_ = MemManager::Instance(memory_type_); | memory_allocator_ = MemManager::Instance(memory_type_); | ||||
if (memory_allocator_ == nullptr) { | if (memory_allocator_ == nullptr) { | ||||
return ge::FAILED; | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
@@ -51,6 +51,13 @@ using ClusterPtr = std::shared_ptr<Cluster>; | |||||
static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { | static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { | ||||
for (const auto &node : root_graph->GetAllNodes()) { | for (const auto &node : root_graph->GetAllNodes()) { | ||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | GE_CHECK_NOTNULL(node->GetOpDesc()); | ||||
// not do partition in single op scene. | |||||
bool is_singleop = false; | |||||
(void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_singleop); | |||||
if (is_singleop) { | |||||
return false; | |||||
} | |||||
for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { | for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { | ||||
auto type = input_desc.GetDataType(); | auto type = input_desc.GetDataType(); | ||||
if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { | ||||
@@ -26,9 +26,6 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { | ||||
std::set<std::string> un_compute_attrs = { | |||||
{ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, | |||||
}; | |||||
std::string GetCseKey(const NodePtr &node) { | std::string GetCseKey(const NodePtr &node) { | ||||
std::stringstream ss; | std::stringstream ss; | ||||
@@ -53,7 +50,7 @@ std::string GetCseKey(const NodePtr &node) { | |||||
ss << name << "-"; | ss << name << "-"; | ||||
} | } | ||||
ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); | |||||
ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); | |||||
return ss.str(); | return ss.str(); | ||||
} | } | ||||
@@ -58,9 +58,9 @@ Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) { | |||||
continue; | continue; | ||||
} | } | ||||
// pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU | |||||
// pass node without attr: ATTR_SINGLE_OP_SCENE | |||||
bool single_aicpu_unknown = false; | bool single_aicpu_unknown = false; | ||||
if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) || | |||||
if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, single_aicpu_unknown) || | |||||
!single_aicpu_unknown) { | !single_aicpu_unknown) { | ||||
continue; | continue; | ||||
} | } | ||||
@@ -1,811 +0,0 @@ | |||||
/** | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#include "graph/passes/variable_op_pass.h" | |||||
#include <string> | |||||
#include <vector> | |||||
#include "common/formats/formats.h" | |||||
#include "common/formats/utils/formats_trans_utils.h" | |||||
#include "graph/ge_context.h" | |||||
#include "graph/graph.h" | |||||
#include "graph/manager/graph_var_manager.h" | |||||
#include "graph/utils/graph_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/type_utils.h" | |||||
namespace ge { | |||||
namespace { | |||||
const int kTransOpOutIndex = 0; | |||||
Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) { | |||||
GE_CHECK_NOTNULL(front_node); | |||||
GE_CHECK_NOTNULL(back_node); | |||||
GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str()); | |||||
auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node); | |||||
if (ret != GRAPH_SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, | |||||
"Failed to move control edges from trans " | |||||
"node %s to var-ref %s", | |||||
front_node->GetName().c_str(), back_node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto back_node_in_anchor = back_node->GetInDataAnchor(0); | |||||
if (back_node_in_anchor == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, | |||||
"The back node %s does not have an " | |||||
"input anchor", | |||||
back_node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
back_node_in_anchor->UnlinkAll(); | |||||
auto trans_in_anchor = front_node->GetInDataAnchor(0); | |||||
if (trans_in_anchor == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, | |||||
"Failed to get the in data anchor from trans" | |||||
" node %s type %s", | |||||
front_node->GetName().c_str(), front_node->GetType().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); | |||||
if (prev_trans_node_out_anchor == nullptr) { | |||||
GELOGW( | |||||
"The trans node %s does not have an input, so the ref node %s does" | |||||
" not have any inputs after bypass", | |||||
front_node->GetName().c_str(), front_node->GetName().c_str()); | |||||
} else { | |||||
ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor); | |||||
if (ret != GRAPH_SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, | |||||
"Failed to add edge between ref node %s " | |||||
"and the prev node of trans node %s", | |||||
back_node->GetName().c_str(), front_node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
bool IsTransSupport(const TransNodeInfo &trans_info) { | |||||
if (trans_info.output.GetShape().IsUnknownShape()) { | |||||
return false; | |||||
} | |||||
if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) { | |||||
return true; | |||||
} else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) { | |||||
formats::TransArgs args{nullptr, | |||||
trans_info.input.GetFormat(), | |||||
trans_info.output.GetFormat(), | |||||
trans_info.input.GetShape().GetDims(), | |||||
trans_info.output.GetShape().GetDims(), | |||||
trans_info.input.GetDataType()}; | |||||
return formats::IsTransFormatSupport(args); | |||||
} else if (trans_info.node_type == CAST) { | |||||
formats::CastArgs datatype_args{nullptr, static_cast<size_t>(trans_info.input.GetShape().GetShapeSize()), | |||||
trans_info.input.GetDataType(), trans_info.output.GetDataType()}; | |||||
return formats::IsTransDataTypeSupport(datatype_args); | |||||
} else { | |||||
return false; | |||||
} | |||||
} | |||||
std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) { | |||||
int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); | |||||
auto op_desc = trans_node->GetOpDesc(); | |||||
GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); | |||||
if (reverse) { | |||||
GeTensorDesc tmp_desc = input_desc; | |||||
input_desc = output_desc; | |||||
output_desc = tmp_desc; | |||||
} | |||||
auto input_format = input_desc.GetFormat(); | |||||
auto input_type = input_desc.GetDataType(); | |||||
auto input_shape = input_desc.GetShape(); | |||||
auto output_format = output_desc.GetFormat(); | |||||
auto output_type = output_desc.GetDataType(); | |||||
auto output_shape = output_desc.GetShape(); | |||||
std::stringstream diff_key; | |||||
diff_key.str(""); | |||||
if (input_format != output_format) { | |||||
diff_key << static_cast<int>(input_format) << '-' << static_cast<int>(output_format) << '-'; | |||||
} else { | |||||
diff_key << "*-"; | |||||
} | |||||
if (input_type != output_type) { | |||||
diff_key << static_cast<int>(input_type) << '-' << static_cast<int>(output_type) << '-'; | |||||
} else { | |||||
diff_key << "*-"; | |||||
} | |||||
if (!ge::formats::IsShapeEqual(input_shape, output_shape)) { | |||||
for (auto dim : input_shape.GetDims()) { | |||||
diff_key << dim << '-'; | |||||
} | |||||
for (auto dim : output_shape.GetDims()) { | |||||
diff_key << dim << '-'; | |||||
} | |||||
} else { | |||||
diff_key << "*"; | |||||
} | |||||
return diff_key.str(); | |||||
} | |||||
} // namespace | |||||
Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { | |||||
if (graph == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph"); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), | |||||
GetContext().SessionId(), graph->GetGraphID()); | |||||
if (var_accelerate_ctrl_ == nullptr) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str()); | |||||
if (RenewVarDesc(graph) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph"); | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (GenerateVariableVariableRefMap(graph) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
GELOGD("Begin to fusion variables and trans nodes"); | |||||
for (auto &var_to_refs : var_and_var_ref_map_) { | |||||
auto &node = var_to_refs.first; | |||||
GE_CHECK_NOTNULL(node); | |||||
GE_CHECK_NOTNULL(var_accelerate_ctrl_); | |||||
if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) { | |||||
GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str()); | |||||
continue; | |||||
} | |||||
VarTransRoad fusion_road; | |||||
auto ret = FusionIfNeed(node, fusion_road); | |||||
if (ret != SUCCESS) { | |||||
return ret; | |||||
} | |||||
if (fusion_road.empty()) { | |||||
GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str()); | |||||
continue; | |||||
} | |||||
ret = RenewTransRoadDesc(node, fusion_road); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str()); | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
auto start_iter = fusion_road.begin(); | |||||
auto end_iter = fusion_road.rbegin(); | |||||
GELOGD( | |||||
"Trans variable data for %s from format %s to %s, shape %s to %s " | |||||
"data-type %s to %s, path len %zu success", | |||||
node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), | |||||
TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), | |||||
formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), | |||||
formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), | |||||
TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), | |||||
TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); | |||||
ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
var_accelerate_ctrl_->SetVarChanged(node->GetName()); | |||||
GELOGD("Begin to update format info for var %s.", node->GetName().c_str()); | |||||
std::set<ge::NodePtr> node_set({node}); | |||||
if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
// renew var desc if the trans_road is all reshape or reformat | |||||
ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) { | |||||
auto var_desc = var->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(var_desc); | |||||
TransNodeInfo prev_node_info; | |||||
prev_node_info.node_type = var->GetType(); | |||||
prev_node_info.output = var_desc->GetOutputDesc(0); | |||||
// two cases | |||||
// fisrt Var->cast->transdata which transdata in fusion road | |||||
// the input of transdata is not equal with output of var | |||||
// case 1 : suppose input dtype of transdata equal with out dtype | |||||
// but not equal with var | |||||
// so we make input dtype and output dytpe of transroad equal with var | |||||
// case 2: suppose input format of transdata not equal with out format | |||||
// and input format not equal with var | |||||
// so we make input format equal with var | |||||
for (auto &cur_trans : fusion_road) { | |||||
if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { | |||||
cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); | |||||
} | |||||
if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) { | |||||
cur_trans.output.SetDataType(prev_node_info.output.GetDataType()); | |||||
} | |||||
if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) { | |||||
cur_trans.output.SetShape(prev_node_info.output.GetShape()); | |||||
} | |||||
cur_trans.input = prev_node_info.output; | |||||
prev_node_info.output = cur_trans.output; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) { | |||||
bool can_fusion = false; | |||||
while (true) { | |||||
map<string, vector<NodePtr>> trans_type_to_trans_ops ; | |||||
map<string, pair<string, bool>> trans_type_to_changed_desc; | |||||
// record the order of trans op in first path | |||||
vector<string> first_path_trans_order; | |||||
auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc, | |||||
trans_type_to_trans_ops, can_fusion); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed"); | |||||
return ret; | |||||
} | |||||
if (!can_fusion) { | |||||
break; | |||||
} | |||||
vector<pair<NodePtr, NodePtr>> delete_var_ref_trans_nodes; | |||||
ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "get and check trans op of varref failed"); | |||||
return ret; | |||||
} | |||||
if (!can_fusion) { | |||||
break; | |||||
} | |||||
ret = UpdateTransRoad(fusion_road, first_path_trans_order, | |||||
trans_type_to_changed_desc, trans_type_to_trans_ops); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Update trans road failed"); | |||||
return ret; | |||||
} | |||||
if (fusion_road.empty()) { | |||||
return SUCCESS; | |||||
} | |||||
ret = DealFusion(var, fusion_road, trans_type_to_changed_desc, | |||||
trans_type_to_trans_ops, delete_var_ref_trans_nodes); | |||||
if (ret != SUCCESS) { | |||||
return ret; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector<std::string> &first_path_trans_order, | |||||
map<std::string, std::pair<std::string, bool>> &trans_type_to_changed_desc, | |||||
map<std::string, vector<NodePtr>> &trans_type_to_trans_ops){ | |||||
vector<std::string> delete_trans_type; | |||||
for (auto &trans_type : first_path_trans_order) { | |||||
if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { | |||||
continue; | |||||
} | |||||
bool delete_flag = false; | |||||
for (auto &trans_node : trans_type_to_trans_ops[trans_type]) { | |||||
int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); | |||||
auto out_op_desc = trans_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(out_op_desc); | |||||
TransNodeInfo trans_node_info; | |||||
trans_node_info.node_type = trans_node->GetType(); | |||||
trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); | |||||
trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); | |||||
if (!IsTransSupport(trans_node_info)) { | |||||
delete_flag = true; | |||||
GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str()); | |||||
break; | |||||
} | |||||
} | |||||
if (delete_flag) { | |||||
delete_trans_type.push_back(trans_type); | |||||
} else { | |||||
auto &trans_node = *trans_type_to_trans_ops[trans_type].begin(); | |||||
auto out_op_desc = trans_node->GetOpDesc(); | |||||
int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); | |||||
TransNodeInfo trans_node_info; | |||||
trans_node_info.node_type = trans_node->GetType(); | |||||
trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); | |||||
trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); | |||||
fusion_road.emplace_back(trans_node_info); | |||||
} | |||||
} | |||||
for (auto &trans_type : delete_trans_type) { | |||||
trans_type_to_changed_desc.erase(trans_type); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, | |||||
map<std::string, std::pair<std::string, bool>> trans_type_to_changed_desc, | |||||
map<std::string, vector<NodePtr>> trans_type_to_trans_ops, | |||||
vector<pair<NodePtr, NodePtr>> &delete_trans_nodes) { | |||||
GE_CHECK_NOTNULL(var_node); | |||||
GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str()); | |||||
auto graph = var_node->GetOwnerComputeGraph(); | |||||
for (auto &trans_type : trans_type_to_changed_desc) { | |||||
for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) { | |||||
GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(), | |||||
trans_node->GetType().c_str(), var_node->GetName().c_str()); | |||||
if (RenewTransOpDesc(trans_node, true) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
} | |||||
} | |||||
// Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}} | |||||
// we should delete {a,b} first , then b->c,then we can delete {b,c} | |||||
// if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b} | |||||
for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) { | |||||
auto front_node = iter->first; | |||||
auto back_node = iter->second; | |||||
if (RenewTransOpDesc(front_node, false) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (front_node->GetOutDataNodes().size() > 1) { | |||||
GELOGD("The trans node %s type %s connecting with var-ref %s has more" | |||||
" than one output data nodes, unlink the edge between them", | |||||
front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); | |||||
if (ByPassTransNode(front_node, back_node) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(), | |||||
back_node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
} else { | |||||
GELOGD("The trans node %s type %s connecting with %s has only" | |||||
" one output data nodes, isolate and remove it.", | |||||
front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); | |||||
if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) { | |||||
int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType()); | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); | |||||
GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc; | |||||
bool format_changed = false; | |||||
bool shape_changed = false; | |||||
bool dtype_changed = false; | |||||
if (input_desc.GetFormat() != output_desc.GetFormat()) { | |||||
format_changed = true; | |||||
} | |||||
if (input_desc.GetDataType() != output_desc.GetDataType()) { | |||||
dtype_changed = true; | |||||
} | |||||
if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) { | |||||
shape_changed = true; | |||||
} | |||||
auto cur_node = node; | |||||
while (TransOpUtil::IsTransOp(cur_node)) { | |||||
tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); | |||||
auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) : | |||||
cur_node->GetOutDataNodes().at(kTransOpOutIndex); | |||||
if (!TransOpUtil::IsTransOp(next_node)) { | |||||
break; | |||||
} | |||||
auto prev_desc = next_node->GetOpDesc(); | |||||
tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType()); | |||||
auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex); | |||||
auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index); | |||||
GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex)); | |||||
GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index)); | |||||
if (shape_changed) { | |||||
mutable_input_desc->SetShape(renew_desc.GetShape()); | |||||
mutable_output_desc->SetShape(renew_desc.GetShape()); | |||||
} | |||||
if (dtype_changed) { | |||||
mutable_input_desc->SetDataType(renew_desc.GetDataType()); | |||||
mutable_output_desc->SetDataType(renew_desc.GetDataType()); | |||||
} | |||||
if (format_changed) { | |||||
mutable_input_desc->SetFormat(renew_desc.GetFormat()); | |||||
mutable_output_desc->SetFormat(renew_desc.GetFormat()); | |||||
} | |||||
cur_node = next_node; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector<string> &first_path_trans_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &flag) { | |||||
bool is_match = true; | |||||
auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc, | |||||
trans_type_to_trans_ops, is_match); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str()); | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
if (!is_match) { | |||||
flag = false; | |||||
GELOGI("trans nodes after variable do not meet the condition"); | |||||
return SUCCESS; | |||||
} | |||||
flag = true; | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector<string> &first_path_trans_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match) { | |||||
GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str()); | |||||
auto ret = GetFisrtPathTransInfo(var, first_path_trans_order, | |||||
trans_type_to_changed_desc, trans_type_to_trans_ops); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
if (first_path_trans_order.empty()) { | |||||
GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str()); | |||||
is_match = false; | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str()); | |||||
VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match); | |||||
return SUCCESS; | |||||
} | |||||
void VariableOpPass::VariableDFS(const NodePtr &node, map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match) { | |||||
std::stack<NodePtr> node_stack; | |||||
std::stack<vector<NodePtr>> path_stack; | |||||
for (auto &out_node : node->GetOutDataNodes()) { | |||||
if (!is_match) { | |||||
break; | |||||
} | |||||
if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) { | |||||
is_match = false; | |||||
break; | |||||
} | |||||
node_stack.push(out_node); | |||||
path_stack.emplace(vector<NodePtr>{out_node}); | |||||
while (!node_stack.empty() && is_match) { | |||||
auto cur_node = node_stack.top(); | |||||
auto cur_path = path_stack.top(); | |||||
node_stack.pop(); | |||||
path_stack.pop(); | |||||
if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) { | |||||
UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops); | |||||
continue; | |||||
} | |||||
for (auto &next_node : cur_node->GetOutDataNodes()) { | |||||
node_stack.push(next_node); | |||||
auto next_path = cur_path; | |||||
next_path.push_back(next_node); | |||||
path_stack.emplace(next_path); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
Status VariableOpPass::UpdateTransInfo(vector<NodePtr> &cur_path, bool& is_match, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops) { | |||||
GELOGD("Begin to update trans info by path"); | |||||
std::set<string> trans_op_occured; | |||||
for (auto &trans_node : cur_path) { | |||||
auto trans_node_type = trans_node->GetType(); | |||||
if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() || | |||||
!ge::TransOpUtil::IsTransOp(trans_node_type)) { | |||||
continue; | |||||
} | |||||
trans_op_occured.insert(trans_node_type); | |||||
auto desc_diff = GetInAndOutDecsDiff(trans_node); | |||||
if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() && | |||||
desc_diff == trans_type_to_changed_desc[trans_node_type].first) { | |||||
trans_type_to_changed_desc[trans_node_type].second = true; | |||||
auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(), | |||||
trans_type_to_trans_ops[trans_node_type].end(), | |||||
trans_node); | |||||
if (iter == trans_type_to_trans_ops[trans_node_type].end()) { | |||||
trans_type_to_trans_ops[trans_node_type].push_back(trans_node); | |||||
} | |||||
} | |||||
} | |||||
std::set<string> delete_trans_types; | |||||
for (auto &trans_item : trans_type_to_changed_desc) { | |||||
if (!trans_item.second.second) { | |||||
delete_trans_types.insert(trans_item.first); | |||||
} else { | |||||
trans_item.second.second = false; | |||||
} | |||||
} | |||||
for (auto& delete_item : delete_trans_types) { | |||||
trans_type_to_changed_desc.erase(delete_item); | |||||
} | |||||
if (trans_type_to_changed_desc.empty()) { | |||||
is_match = false; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector<string> &first_path_trans_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops) { | |||||
auto cur_node = var; | |||||
while (cur_node->GetOutDataNodesSize() != 0) { | |||||
cur_node = cur_node->GetOutDataNodes().at(0); | |||||
GE_CHECK_NOTNULL(cur_node); | |||||
if (!ge::TransOpUtil::IsTransOp(cur_node)) { | |||||
break; | |||||
} | |||||
auto cur_node_type = cur_node->GetType(); | |||||
// only get the the first occurrence operator of same type | |||||
if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) { | |||||
auto desc_diff = GetInAndOutDecsDiff(cur_node); | |||||
trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false); | |||||
trans_type_to_trans_ops[cur_node->GetType()] = vector<NodePtr>{cur_node}; | |||||
first_path_trans_order.push_back(cur_node->GetType()); | |||||
} | |||||
} | |||||
GELOGD("get var %s first path trans info success", var->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
vector<pair<NodePtr, NodePtr>> &delete_var_ref_trans_nodes) { | |||||
auto iterator = var_and_var_ref_map_.find(var_node); | |||||
if (iterator == var_and_var_ref_map_.end()) { | |||||
GELOGD("there is no var_ref of node %s", var_node->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
vector<string> delete_trans_type; | |||||
for (auto &trans_type : trans_type_to_changed_desc) { | |||||
delete_trans_type.push_back(trans_type.first); | |||||
} | |||||
for (auto &ref_node : iterator->second) { | |||||
GE_CHECK_NOTNULL(ref_node); | |||||
auto cur_node = *ref_node->GetInDataNodes().begin(); | |||||
auto behind_node = ref_node; | |||||
GE_CHECK_NOTNULL(cur_node); | |||||
vector<string> tmp_delete_trans_type = delete_trans_type; | |||||
while (TransOpUtil::IsTransOp(cur_node)) { | |||||
GE_CHECK_NOTNULL(cur_node); | |||||
auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType()); | |||||
if (iter != tmp_delete_trans_type.end()) { | |||||
CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first, | |||||
pass_check); | |||||
if (!pass_check) { | |||||
GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
tmp_delete_trans_type.erase(iter); | |||||
delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node)); | |||||
} | |||||
int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); | |||||
behind_node = cur_node; | |||||
cur_node = cur_node->GetInDataNodes().at(tran_in_index); | |||||
} | |||||
if (!tmp_delete_trans_type.empty()) { | |||||
pass_check = false; | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, | |||||
bool &is_symmetry){ | |||||
auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true); | |||||
is_symmetry = (var_ref_trans_op_desc_diff == desc_diff); | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) { | |||||
if (node == nullptr || node->GetOpDesc() == nullptr) { | |||||
GELOGE(FAILED, "node or opdesc is nullptr"); | |||||
return FAILED; | |||||
} | |||||
const Format &format = final_output.GetFormat(); | |||||
const DataType &data_type = final_output.GetDataType(); | |||||
const GeShape &shape = final_output.GetShape(); | |||||
GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(), | |||||
TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str()); | |||||
auto node_desc = node->GetOpDesc()->GetOutputDesc(0); | |||||
CopyVariableFormatDataTypeAndShape(final_output, node_desc); | |||||
if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { | |||||
GELOGE(FAILED, "update output desc fail."); | |||||
return FAILED; | |||||
} | |||||
GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.", | |||||
TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(), | |||||
TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(), | |||||
node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str()); | |||||
auto iterator = var_and_var_ref_map_.find(node); | |||||
if (iterator == var_and_var_ref_map_.end()) { | |||||
auto graph = node->GetOwnerComputeGraph(); | |||||
if (GenerateVariableVariableRefMap(graph) != SUCCESS) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
} | |||||
iterator = var_and_var_ref_map_.find(node); | |||||
if (iterator == var_and_var_ref_map_.end()) { | |||||
GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(), | |||||
node->GetOwnerComputeGraph()->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
for (const auto &var_ref_node : iterator->second) { | |||||
auto var_ref_node_description = var_ref_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(var_ref_node_description); | |||||
GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.", | |||||
TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(), | |||||
shape.GetDims().size(), var_ref_node->GetName().c_str()); | |||||
if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { | |||||
GELOGW("UpdateOutputDesc fail."); | |||||
} | |||||
if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) { | |||||
GELOGW("UpdateInputDesc fail."); | |||||
} | |||||
const auto &input_desc = var_ref_node_description->MutableInputDesc(0); | |||||
const auto &output_desc = var_ref_node_description->MutableOutputDesc(0); | |||||
GE_CHECK_NOTNULL(input_desc); | |||||
GE_CHECK_NOTNULL(output_desc); | |||||
GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.", | |||||
TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), | |||||
TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(), | |||||
var_ref_node->GetName().c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) { | |||||
std::map<std::string, NodePtr> names_to_var; | |||||
std::map<std::string, std::set<NodePtr>> names_to_refs; | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
for (auto &node : compute_graph->GetDirectNode()) { | |||||
if (node->GetType() != VARIABLE) { | |||||
continue; | |||||
} | |||||
std::string ref_var_name; | |||||
if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) { | |||||
names_to_var[node->GetName()] = node; | |||||
} else { | |||||
names_to_refs[ref_var_name].insert(node); | |||||
} | |||||
} | |||||
for (auto &name_to_var : names_to_var) { | |||||
var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first]; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, | |||||
GeTensorDesc &dst_tensor_desc) { | |||||
dst_tensor_desc.SetShape(src_tensor_desc.GetShape()); | |||||
dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat()); | |||||
dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType()); | |||||
} | |||||
Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set<NodePtr> &nodes) { | |||||
for (auto &need_set_node : nodes) { | |||||
auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node); | |||||
if (ret != SUCCESS) { | |||||
return GE_GRAPH_VARIABLE_OP_PASS_FAILED; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { | |||||
GE_CHECK_NOTNULL(graph); | |||||
// renew var manager desc | |||||
Status ret = SUCCESS; | |||||
for (auto &node : graph->GetDirectNode()) { | |||||
bool is_var_node = | |||||
(node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); | |||||
if (is_var_node) { | |||||
if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { | |||||
GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); | |||||
continue; | |||||
} | |||||
GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str()); | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) { | |||||
// renew var desc if the trans_road is all reshape or reformat | |||||
for (auto &road : fusion_road) { | |||||
if (road.node_type != RESHAPE && road.node_type != REFORMAT) { | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) { | |||||
GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
GELOGD("var manager exist var node[%s]", node->GetName().c_str()); | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace ge |
@@ -1,104 +0,0 @@ | |||||
/** | |||||
* Copyright 2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ | |||||
#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ | |||||
#include <map> | |||||
#include <set> | |||||
#include <stack> | |||||
#include "graph/common/transop_util.h" | |||||
#include "common/formats/utils/formats_trans_utils.h" | |||||
#include "graph/utils/node_utils.h" | |||||
#include "graph/graph.h" | |||||
#include "graph/manager/graph_var_manager.h" | |||||
#include "graph/manager/util/variable_accelerate_ctrl.h" | |||||
#include "inc/graph_pass.h" | |||||
namespace ge { | |||||
namespace variable_op { | |||||
struct NodeDesc { | |||||
ge::GeTensorDesc input; | |||||
ge::GeTensorDesc output; | |||||
bool is_update = false; | |||||
}; | |||||
} // namespace variable_op | |||||
class VariableOpPass : public GraphPass { | |||||
public: | |||||
explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {} | |||||
~VariableOpPass() override = default; | |||||
Status Run(ge::ComputeGraphPtr graph) override; | |||||
private: | |||||
Status UpdateTransRoad(VarTransRoad &fusion_road, vector<string> &trans_road_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops); | |||||
Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, | |||||
map<string, pair<string, bool>> trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> trans_type_to_trans_ops, | |||||
vector<pair<NodePtr, NodePtr>> &delete_trans_nodes); | |||||
Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse); | |||||
Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road); | |||||
Status CheckIfCouldBeOptimized(const NodePtr &var, vector<string> &trans_road_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &flag); | |||||
Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road); | |||||
Status GetSameTransOP(const NodePtr &var, vector<string> &trans_road_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match); | |||||
Status GetFisrtPathTransInfo(const NodePtr &var, vector<string> &trans_road_order, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops); | |||||
void VariableDFS(const NodePtr &node, map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops, bool &is_match); | |||||
Status UpdateTransInfo(vector<NodePtr> &cur_path, bool& is_match, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
map<string, vector<NodePtr>> &trans_type_to_trans_ops); | |||||
Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, | |||||
map<string, pair<string, bool>> &trans_type_to_changed_desc, | |||||
vector<pair<NodePtr, NodePtr>> &delete_var_ref_trans_nodes); | |||||
Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry); | |||||
Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node); | |||||
Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph); | |||||
void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc); | |||||
Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set<NodePtr> &nodes); | |||||
Status RenewVarDesc(ge::ComputeGraphPtr &graph); | |||||
Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road); | |||||
map<NodePtr, std::set<NodePtr>> var_and_var_ref_map_; | |||||
VarAccelerateCtrl *var_accelerate_ctrl_; | |||||
}; | |||||
} // namespace ge | |||||
#endif // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ |
@@ -1925,7 +1925,7 @@ void GraphPrepare::TypeConversionOfConstant() { | |||||
for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { | for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { | ||||
// This can ensure that n is not a null pointer | // This can ensure that n is not a null pointer | ||||
// No Conversion when called by aclOpCompile | // No Conversion when called by aclOpCompile | ||||
(void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile); | |||||
(void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_acl_compile); | |||||
if (is_acl_compile) { | if (is_acl_compile) { | ||||
return; | return; | ||||
} | } | ||||
@@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std: | |||||
std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | std::unique_ptr<domi::AippOpParams> aipp_params(new (std::nothrow) domi::AippOpParams()); | ||||
ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ge::GeAttrValue::NAMED_ATTRS aipp_attr; | ||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | |||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, | |||||
"Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); | ||||
@@ -221,7 +221,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||||
auto &tensor_desc = input_tensor_desc_[input_index]; | auto &tensor_desc = input_tensor_desc_[input_index]; | ||||
tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); | tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); | ||||
args.input_desc[input_index] = tensor_desc; | args.input_desc[input_index] = tensor_desc; | ||||
GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||||
GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), | ||||
"Failed to calc tensor size, index = %zu, shape = [%s]", | "Failed to calc tensor size, index = %zu, shape = [%s]", | ||||
input_index, | input_index, | ||||
@@ -238,7 +238,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||||
GE_CHECK_NOTNULL(tensor_buffer); | GE_CHECK_NOTNULL(tensor_buffer); | ||||
args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release())); | args.inputs.emplace_back(std::shared_ptr<TensorBuffer>(tensor_buffer.release())); | ||||
GELOGD("To copy input data for input[%u]", input_index); | |||||
GELOGD("To copy input data for input[%zu]", input_index); | |||||
const DataBuffer &data_buf = blobs[input_index]; | const DataBuffer &data_buf = blobs[input_index]; | ||||
auto mem_size = static_cast<uint64_t>(tensor_size); | auto mem_size = static_cast<uint64_t>(tensor_size); | ||||
GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, | GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, | ||||
@@ -247,7 +247,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy | |||||
data_buf.length, | data_buf.length, | ||||
mem_size); | mem_size); | ||||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]", | |||||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]", | |||||
model_->root_runtime_param_.graph_id, | model_->root_runtime_param_.graph_id, | ||||
input_index, | input_index, | ||||
args.inputs[input_index].GetData(), | args.inputs[input_index].GetData(), | ||||
@@ -174,6 +174,38 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel | |||||
compute_graph_info = context_->GetProfilingGraphDescInfo(); | compute_graph_info = context_->GetProfilingGraphDescInfo(); | ||||
context_->ClearProfilingGraphDescInfo(); | context_->ClearProfilingGraphDescInfo(); | ||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (auto &tmp_compute_graph_info : compute_graph_info) { | |||||
// default | |||||
if (op_desc->GetAllInputsSize() == 0) { | |||||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.input_shape = { {0} }; | |||||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
if (input_desc == nullptr) { | |||||
continue; | |||||
} | |||||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
} | |||||
if (op_desc->GetOutputsSize() == 0) { | |||||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.output_shape = { {0} }; | |||||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
} | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -939,7 +939,7 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); | ||||
return MEMALLOC_FAILED; | return MEMALLOC_FAILED; | ||||
} | } | ||||
GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size); | |||||
GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); | |||||
std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | std::unique_ptr<TensorValue> tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), | ||||
tensor_size)); | tensor_size)); | ||||
@@ -1608,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons | |||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
NodePtr node_ptr = nullptr; | NodePtr node_ptr = nullptr; | ||||
vector<domi::TaskDef> task_def_list; | |||||
map<NodePtr, vector<domi::TaskDef>> node_task_map; | |||||
// create fp node | // create fp node | ||||
bool is_insert_fp_profiling_task = false; | bool is_insert_fp_profiling_task = false; | ||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); | ||||
if (is_insert_fp_profiling_task) { | if (is_insert_fp_profiling_task) { | ||||
vector<domi::TaskDef> task_def_list; | |||||
(void)GenerateFpProfilingTask(op_desc, task_def_list); | (void)GenerateFpProfilingTask(op_desc, task_def_list); | ||||
auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE); | auto fp_desc = MakeShared<OpDesc>(kProfilingFpNode, PROFILINGTRAININGTRACE); | ||||
GE_CHECK_NOTNULL(fp_desc); | GE_CHECK_NOTNULL(fp_desc); | ||||
fp_desc->SetOpKernelLibName(kEngineNameRts); | fp_desc->SetOpKernelLibName(kEngineNameRts); | ||||
node_ptr = compute_graph->AddNode(fp_desc); | node_ptr = compute_graph->AddNode(fp_desc); | ||||
GE_CHECK_NOTNULL(node_ptr); | |||||
node_task_map[node_ptr] = task_def_list; | |||||
GELOGD("Create fp profiling node success before."); | GELOGD("Create fp profiling node success before."); | ||||
} | } | ||||
// creat all reduce start node | // creat all reduce start node | ||||
@@ -1625,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons | |||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | ||||
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
if (is_all_reduce && is_insert_bp_profiling_task) { | if (is_all_reduce && is_insert_bp_profiling_task) { | ||||
vector<domi::TaskDef> task_def_list; | |||||
int64_t log_id = 0; | int64_t log_id = 0; | ||||
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | ||||
GELOGD("All reduce node profiling task log id: %ld before", log_id); | GELOGD("All reduce node profiling task log id: %ld before", log_id); | ||||
@@ -1634,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons | |||||
GE_CHECK_NOTNULL(ar_desc_start); | GE_CHECK_NOTNULL(ar_desc_start); | ||||
ar_desc_start->SetOpKernelLibName(kEngineNameRts); | ar_desc_start->SetOpKernelLibName(kEngineNameRts); | ||||
node_ptr = compute_graph->AddNode(ar_desc_start); | node_ptr = compute_graph->AddNode(ar_desc_start); | ||||
GE_CHECK_NOTNULL(node_ptr); | |||||
node_task_map[node_ptr] = task_def_list; | |||||
GELOGD("Create all reduce start profiling node success before."); | GELOGD("Create all reduce start profiling node success before."); | ||||
} | } | ||||
if (node_ptr != nullptr) { | |||||
for (const auto &task_def : task_def_list) { | |||||
hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); | |||||
if (!node_task_map.empty()) { | |||||
for (const auto &node_task : node_task_map) { | |||||
NodePtr profiling_node = node_task.first; | |||||
vector<domi::TaskDef> task_def_lists = node_task.second; | |||||
for (const auto &task_def : task_def_lists) { | |||||
hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); | |||||
} | |||||
NodeItem *node_item = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
graph_item.node_items_.emplace_back(node_item); | |||||
} | } | ||||
NodeItem *node_item = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
graph_item.node_items_.emplace_back(node_item); | |||||
} else { | } else { | ||||
GELOGD("No need to create profiling node before."); | GELOGD("No need to create profiling node before."); | ||||
} | } | ||||
@@ -1661,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const | |||||
GE_CHECK_NOTNULL(compute_graph); | GE_CHECK_NOTNULL(compute_graph); | ||||
NodePtr node_ptr = nullptr; | NodePtr node_ptr = nullptr; | ||||
vector<domi::TaskDef> task_def_list; | |||||
map<NodePtr, vector<domi::TaskDef>> node_task_map; | |||||
// Create all reduce end node | // Create all reduce end node | ||||
bool is_insert_bp_profiling_task = false; | bool is_insert_bp_profiling_task = false; | ||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); | ||||
bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); | ||||
if (is_all_reduce && is_insert_bp_profiling_task) { | if (is_all_reduce && is_insert_bp_profiling_task) { | ||||
vector<domi::TaskDef> task_def_list; | |||||
int64_t log_id = 0; | int64_t log_id = 0; | ||||
(void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); | ||||
GELOGD("All reduce node profiling task log id: %ld after", log_id); | GELOGD("All reduce node profiling task log id: %ld after", log_id); | ||||
@@ -1676,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const | |||||
GE_CHECK_NOTNULL(ar_desc_end); | GE_CHECK_NOTNULL(ar_desc_end); | ||||
ar_desc_end->SetOpKernelLibName(kEngineNameRts); | ar_desc_end->SetOpKernelLibName(kEngineNameRts); | ||||
node_ptr = compute_graph->AddNode(ar_desc_end); | node_ptr = compute_graph->AddNode(ar_desc_end); | ||||
GE_CHECK_NOTNULL(node_ptr); | |||||
node_task_map[node_ptr] = task_def_list; | |||||
GELOGD("Create all reduce end profiling node success after."); | GELOGD("Create all reduce end profiling node success after."); | ||||
} | } | ||||
// create bp node | // create bp node | ||||
if (!is_all_reduce && is_insert_bp_profiling_task) { | if (!is_all_reduce && is_insert_bp_profiling_task) { | ||||
vector<domi::TaskDef> task_def_list; | |||||
(void) GenerateBpProfilingTask(op_desc, task_def_list); | (void) GenerateBpProfilingTask(op_desc, task_def_list); | ||||
auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE); | auto bp_op_desc = MakeShared<OpDesc>(kProfilingBpNode, PROFILINGTRAININGTRACE); | ||||
GE_CHECK_NOTNULL(bp_op_desc); | GE_CHECK_NOTNULL(bp_op_desc); | ||||
bp_op_desc->SetOpKernelLibName(kEngineNameRts); | bp_op_desc->SetOpKernelLibName(kEngineNameRts); | ||||
node_ptr = compute_graph->AddNode(bp_op_desc); | node_ptr = compute_graph->AddNode(bp_op_desc); | ||||
GE_CHECK_NOTNULL(node_ptr); | |||||
node_task_map[node_ptr] = task_def_list; | |||||
GELOGD("Create bp profiling node success after."); | GELOGD("Create bp profiling node success after."); | ||||
} | } | ||||
// create end node | // create end node | ||||
bool is_insert_end_profiling_task = false; | bool is_insert_end_profiling_task = false; | ||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); | (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); | ||||
if (is_insert_end_profiling_task) { | if (is_insert_end_profiling_task) { | ||||
vector<domi::TaskDef> task_def_list; | |||||
(void)GenerateEndProfilingTask(op_desc, task_def_list); | (void)GenerateEndProfilingTask(op_desc, task_def_list); | ||||
auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE); | auto end_desc = MakeShared<OpDesc>(kProfilingEndNode, PROFILINGTRAININGTRACE); | ||||
GE_CHECK_NOTNULL(end_desc); | GE_CHECK_NOTNULL(end_desc); | ||||
end_desc->SetOpKernelLibName(kEngineNameRts); | end_desc->SetOpKernelLibName(kEngineNameRts); | ||||
node_ptr = compute_graph->AddNode(end_desc); | node_ptr = compute_graph->AddNode(end_desc); | ||||
GE_CHECK_NOTNULL(node_ptr); | |||||
node_task_map[node_ptr] = task_def_list; | |||||
GELOGD("Create end profiling node success after."); | GELOGD("Create end profiling node success after."); | ||||
} | } | ||||
if (node_ptr != nullptr) { | |||||
for (const auto &task_def : task_def_list) { | |||||
hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); | |||||
if (!node_task_map.empty()) { | |||||
for (const auto &node_task : node_task_map) { | |||||
NodePtr profiling_node = node_task.first; | |||||
vector<domi::TaskDef> task_def_lists = node_task.second; | |||||
for (const auto &task_def : task_def_lists) { | |||||
hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); | |||||
} | |||||
NodeItem *node_item = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
graph_item.node_items_.emplace_back(node_item); | |||||
} | } | ||||
NodeItem *node_item = nullptr; | |||||
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); | |||||
node_item->input_start = 0; | |||||
node_item->output_start = 0; | |||||
graph_item.node_items_.emplace_back(node_item); | |||||
} else { | } else { | ||||
GELOGD("No need to create profiling node after."); | GELOGD("No need to create profiling node after."); | ||||
} | } | ||||
@@ -29,8 +29,9 @@ constexpr int64_t kDimEndFlag = INT64_MIN; | |||||
Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | ||||
GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); | GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); | ||||
if (ext_info.empty()) { | if (ext_info.empty()) { | ||||
GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", | |||||
node_name_.c_str()); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
ext_info_len_ = ext_info.size(); | ext_info_len_ = ext_info.size(); | ||||
@@ -38,8 +39,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
GE_CHECK_NOTNULL(ext_info_); | GE_CHECK_NOTNULL(ext_info_); | ||||
if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) { | if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) { | ||||
GELOGE(FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
} | } | ||||
input_shape_and_type_.clear(); | input_shape_and_type_.clear(); | ||||
@@ -72,7 +73,7 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
offset += aicpu_ext_info->infoLen; | offset += aicpu_ext_info->infoLen; | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.", | "Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.", | ||||
node_name_.c_str(), offset, ext_info_len_); | node_name_.c_str(), offset, ext_info_len_); | ||||
GELOGI("Node[%s] parse ext info end.", node_name_.c_str()); | GELOGI("Node[%s] parse ext info end.", node_name_.c_str()); | ||||
@@ -80,13 +81,13 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||||
} | } | ||||
Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", | "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", | ||||
node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); | node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); | ||||
auto type = reinterpret_cast<const int32_t *>(aicpu_ext_info->infoMsg); | auto type = reinterpret_cast<const int32_t *>(aicpu_ext_info->infoMsg); | ||||
GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] parse ext shape type failed as need %d but %d.", | "Node[%s] parse ext shape type failed as need %d but %d.", | ||||
node_name_.c_str(), unknown_type_, *type); | node_name_.c_str(), unknown_type_, *type); | ||||
GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | ||||
@@ -95,7 +96,7 @@ Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { | |||||
Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { | Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { | ||||
auto need_len = input_num_ * sizeof(AicpuShapeAndType); | auto need_len = input_num_ * sizeof(AicpuShapeAndType); | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] parse ext input shape failed as infoLen must be " | "Node[%s] parse ext input shape failed as infoLen must be " | ||||
"input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", | "input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", | ||||
node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); | node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); | ||||
@@ -116,7 +117,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
auto need_len = output_num_ * sizeof(AicpuShapeAndType); | auto need_len = output_num_ * sizeof(AicpuShapeAndType); | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] parse ext output shape failed as infoLen must be " | "Node[%s] parse ext output shape failed as infoLen must be " | ||||
"output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", | "output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", | ||||
node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); | node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); | ||||
@@ -130,7 +131,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { | |||||
} | } | ||||
Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), ACL_ERROR_GE_PARAM_INVALID, | |||||
"Node[%s] parse ext session info failed as infoLen must be %zu but %u.", | "Node[%s] parse ext session info failed as infoLen must be %zu but %u.", | ||||
node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); | node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); | ||||
@@ -173,7 +174,7 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const | |||||
} | } | ||||
Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) { | Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) { | ||||
GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR, | |||||
GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"Node[%s] is depend compute is no need update output shape and type by ext.", | "Node[%s] is depend compute is no need update output shape and type by ext.", | ||||
node_name_.c_str()); | node_name_.c_str()); | ||||
GE_CHECK_LE(output_index, output_num_); | GE_CHECK_LE(output_index, output_num_); | ||||
@@ -183,7 +184,7 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons | |||||
if (unknown_type_ == DEPEND_SHAPE_RANGE) { | if (unknown_type_ == DEPEND_SHAPE_RANGE) { | ||||
std::vector<std::pair<int64_t, int64_t>> range; | std::vector<std::pair<int64_t, int64_t>> range; | ||||
auto range_ret = output_desc.GetShapeRange(range); | auto range_ret = output_desc.GetShapeRange(range); | ||||
GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, | |||||
GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, ACL_ERROR_GE_INTERNAL_ERROR, | |||||
"Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", | "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", | ||||
node_name_.c_str(), range_ret); | node_name_.c_str(), range_ret); | ||||
for (size_t k = 0; k < range.size(); ++k) { | for (size_t k = 0; k < range.size(); ++k) { | ||||
@@ -210,9 +211,9 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da | |||||
AicpuShapeAndType *shape_and_type) { | AicpuShapeAndType *shape_and_type) { | ||||
auto dim_num = shape.GetDimNum(); | auto dim_num = shape.GetDimNum(); | ||||
if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { | if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { | ||||
GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", | |||||
dim_num, aicpu::FWKAdapter::kMaxShapeDims); | dim_num, aicpu::FWKAdapter::kMaxShapeDims); | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
size_t index = 0; | size_t index = 0; | ||||
for (; index < dim_num; ++index) { | for (; index < dim_num; ++index) { | ||||
@@ -126,6 +126,12 @@ Status KnownNodeTask::Init(TaskContext &context) { | |||||
auto dump_properties = context.GetDumpProperties(); | auto dump_properties = context.GetDumpProperties(); | ||||
if (dump_properties.IsDumpOpen()) { | if (dump_properties.IsDumpOpen()) { | ||||
davinci_model_->SetDumpProperties(dump_properties); | davinci_model_->SetDumpProperties(dump_properties); | ||||
void *global_step = nullptr; | |||||
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); | |||||
if (varible_global_step != nullptr) { | |||||
global_step = varible_global_step->MutableData(); | |||||
} | |||||
davinci_model_->SetKnownShapeGlobalStep(global_step); | |||||
} | } | ||||
int32_t device_id = 0; | int32_t device_id = 0; | ||||
rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
@@ -117,11 +117,11 @@ Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executo | |||||
auto executor_type = ResolveExecutorType(node); | auto executor_type = ResolveExecutorType(node); | ||||
const auto it = executors_.find(executor_type); | const auto it = executors_.find(executor_type); | ||||
if (it == executors_.end()) { | if (it == executors_.end()) { | ||||
GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", executor_type); | |||||
GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", static_cast<int>(executor_type)); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), executor_type); | |||||
GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast<int>(executor_type)); | |||||
*executor = it->second.get(); | *executor = it->second.get(); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -165,7 +165,7 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { | |||||
TensorUtils::SetSize(output_tensor, output_mem_size); | TensorUtils::SetSize(output_tensor, output_mem_size); | ||||
GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor), | GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast<uint32_t>(i), output_tensor), | ||||
"hccl update output size failed."); | "hccl update output size failed."); | ||||
GELOGD("%s output desc[%u], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, | |||||
GELOGD("%s output desc[%zu], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, | |||||
output_tensor.GetShape().GetDimNum(), output_mem_size); | output_tensor.GetShape().GetDimNum(), output_mem_size); | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -189,14 +189,14 @@ Status NodeExecutorManager::InitializeExecutors() { | |||||
GE_CHECK_NOTNULL(build_fn); | GE_CHECK_NOTNULL(build_fn); | ||||
auto executor = std::unique_ptr<NodeExecutor>(build_fn()); | auto executor = std::unique_ptr<NodeExecutor>(build_fn()); | ||||
if (executor == nullptr) { | if (executor == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", engine_type); | |||||
GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", static_cast<int>(engine_type)); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GELOGD("Executor of engine type = %d was created successfully", engine_type); | |||||
GELOGD("Executor of engine type = %d was created successfully", static_cast<int>(engine_type)); | |||||
auto ret = executor->Initialize(); | auto ret = executor->Initialize(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", engine_type); | |||||
GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", static_cast<int>(engine_type)); | |||||
for (auto &executor_it : executors_) { | for (auto &executor_it : executors_) { | ||||
executor_it.second->Finalize(); | executor_it.second->Finalize(); | ||||
} | } | ||||
@@ -554,33 +554,6 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream | |||||
tmp_compute_graph_info.model_name = dynamic_model_name; | tmp_compute_graph_info.model_name = dynamic_model_name; | ||||
tmp_compute_graph_info.op_name = op_desc->GetName(); | tmp_compute_graph_info.op_name = op_desc->GetName(); | ||||
tmp_compute_graph_info.op_type = op_desc->GetType(); | tmp_compute_graph_info.op_type = op_desc->GetType(); | ||||
// default | |||||
if (op_desc->GetAllInputsSize() == 0) { | |||||
tmp_compute_graph_info.input_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.input_shape = { {0} }; | |||||
tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | |||||
GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); | |||||
if (input_desc == nullptr) { | |||||
continue; | |||||
} | |||||
tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); | |||||
tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); | |||||
tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); | |||||
} | |||||
if (op_desc->GetOutputsSize() == 0) { | |||||
tmp_compute_graph_info.output_format = { FORMAT_NULL }; | |||||
tmp_compute_graph_info.output_shape = { {0} }; | |||||
tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; | |||||
} | |||||
for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { | |||||
GeTensorDesc output_desc = op_desc->GetOutputDesc(j); | |||||
tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); | |||||
tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); | |||||
tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); | |||||
} | |||||
tmp_compute_graph_info.task_id = task_id; | tmp_compute_graph_info.task_id = task_id; | ||||
tmp_compute_graph_info.stream_id = stream_id; | tmp_compute_graph_info.stream_id = stream_id; | ||||
compute_graph_info.emplace_back(tmp_compute_graph_info); | compute_graph_info.emplace_back(tmp_compute_graph_info); | ||||
@@ -1007,7 +1007,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js | |||||
} else { | } else { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ErrorManager::GetInstance().ATCReportErrMessage("E10003", | ||||
{"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); | ||||
GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); | |||||
} | } | ||||
if (model.model_data != nullptr) { | if (model.model_data != nullptr) { | ||||
@@ -57,9 +57,10 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { | |||||
std::vector<TaskDescInfo> task_desc_info; | std::vector<TaskDescInfo> task_desc_info; | ||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(rt_ret, "Get task_id and stream_id failed."); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
TaskDescInfo tmp_task_desc_info; | TaskDescInfo tmp_task_desc_info; | ||||
@@ -141,7 +141,7 @@ Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id) | |||||
auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); | ||||
if (rt_err != RT_ERROR_NONE) { | if (rt_err != RT_ERROR_NONE) { | ||||
GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast<int>(rt_err)); | ||||
return rt_err; | |||||
return RT_ERROR_TO_GE_STATUS(rt_err); | |||||
} | } | ||||
// use current context as resource key instead | // use current context as resource key instead | ||||
GELOGI("use context as resource key instead when default stream"); | GELOGI("use context as resource key instead when default stream"); | ||||
@@ -438,8 +438,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | ||||
if (task_type == RT_MODEL_TASK_KERNEL) { | if (task_type == RT_MODEL_TASK_KERNEL) { | ||||
if (single_op.op_task_ != nullptr) { | if (single_op.op_task_ != nullptr) { | ||||
GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); | |||||
return UNSUPPORTED; | |||||
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); | |||||
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); | ||||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | ||||
@@ -30,8 +30,8 @@ namespace ge { | |||||
auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), | auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), | ||||
kernel_def_.args().data(), kernel_def_.args().size()); | kernel_def_.args().data(), kernel_def_.args().size()); | ||||
if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "memcpy failed, ret: %d", sec_ret); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
} | } | ||||
auto io_addr_val = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(io_addr)); | auto io_addr_val = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(io_addr)); | ||||
@@ -46,7 +46,7 @@ namespace ge { | |||||
auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret); | GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret); | ||||
return rt_ret; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, | rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, | ||||
@@ -54,7 +54,7 @@ namespace ge { | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
(void)rtFree(fwk_op_args); | (void)rtFree(fwk_op_args); | ||||
GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret); | GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret); | ||||
return rt_ret; | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
*args = fwk_op_args; | *args = fwk_op_args; | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -96,7 +96,7 @@ namespace ge { | |||||
// get kernel_ext_info | // get kernel_ext_info | ||||
auto &kernel_ext_info = kernel_def_.kernel_ext_info(); | auto &kernel_ext_info = kernel_def_.kernel_ext_info(); | ||||
auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); | auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); | ||||
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, | |||||
GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, ACL_ERROR_GE_PARAM_INVALID, | |||||
"task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | ||||
kernel_ext_info.size(), kernel_ext_info_size); | kernel_ext_info.size(), kernel_ext_info_size); | ||||
GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed."); | GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed."); | ||||
@@ -45,7 +45,7 @@ void FreeHbm(void *var) { | |||||
Status OpTask::OpenDump(rtStream_t stream) { | Status OpTask::OpenDump(rtStream_t stream) { | ||||
if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { | if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { | ||||
GELOGI("Dump is open in single op,start to set dump info"); | |||||
GELOGI("Dump is open in single op, start to set dump info"); | |||||
std::vector<uint64_t> input_addrs; | std::vector<uint64_t> input_addrs; | ||||
std::vector<uint64_t> output_adds; | std::vector<uint64_t> output_adds; | ||||
auto input_size = op_desc_->GetInputsSize(); | auto input_size = op_desc_->GetInputsSize(); | ||||
@@ -54,10 +54,10 @@ Status OpTask::OpenDump(rtStream_t stream) { | |||||
size_t arg_num = 0; | size_t arg_num = 0; | ||||
GetIoAddr(arg_base, arg_num); | GetIoAddr(arg_base, arg_num); | ||||
if (arg_num < input_size + output_size) { | if (arg_num < input_size + output_size) { | ||||
GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", | |||||
arg_num, | arg_num, | ||||
input_size + output_size); | input_size + output_size); | ||||
return FAILED; | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
for (size_t i = 0; i < input_size; i++) { | for (size_t i = 0; i < input_size; i++) { | ||||
@@ -120,11 +120,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works | |||||
size_t arg_num = 0; | size_t arg_num = 0; | ||||
GetIoAddr(arg_base, arg_num); | GetIoAddr(arg_base, arg_num); | ||||
if (arg_num < all_addresses.size()) { | if (arg_num < all_addresses.size()) { | ||||
GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", | |||||
op_desc_->GetName().c_str(), | op_desc_->GetName().c_str(), | ||||
all_addresses.size(), | all_addresses.size(), | ||||
arg_num); | arg_num); | ||||
return INTERNAL_ERROR; | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
for (void *addr : all_addresses) { | for (void *addr : all_addresses) { | ||||
@@ -178,8 +178,8 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { | |||||
} | } | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); | |||||
return RT_FAILED; | |||||
GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); | GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); | ||||
auto status = OpenDump(stream); | auto status = OpenDump(stream); | ||||
@@ -199,8 +199,8 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve | |||||
run_info.block_dim = 0; | run_info.block_dim = 0; | ||||
auto ret = optiling::OpParaCalculate(*node_, run_info); | auto ret = optiling::OpParaCalculate(*node_, run_info); | ||||
if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to invoke OpParaCalculate. ret = %u", ret); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
block_dim_ = run_info.block_dim; | block_dim_ = run_info.block_dim; | ||||
tiling_data_ = run_info.tiling_data.str(); | tiling_data_ = run_info.tiling_data.str(); | ||||
@@ -223,8 +223,8 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc | |||||
} else { | } else { | ||||
std::vector<int64_t> storage_shape; | std::vector<int64_t> storage_shape; | ||||
if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { | if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { | ||||
GELOGE(PARAM_INVALID, "Failed to get storage_shape while storage_format was set"); | |||||
return PARAM_INVALID; | |||||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to get storage_shape while storage_format was set"); | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | } | ||||
GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", | GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", | ||||
@@ -273,7 +273,9 @@ Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) { | |||||
std::vector<int64_t> ws_offsets; | std::vector<int64_t> ws_offsets; | ||||
for (auto ws_size : workspace_sizes) { | for (auto ws_size : workspace_sizes) { | ||||
// alignment and padding should be done in OpParaCalculate | // alignment and padding should be done in OpParaCalculate | ||||
GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); | |||||
if (CheckInt64AddOverflow(total_size, ws_size) != SUCCESS) { | |||||
return ACL_ERROR_GE_INTERNAL_ERROR; | |||||
} | |||||
ws_offsets.emplace_back(total_size); | ws_offsets.emplace_back(total_size); | ||||
total_size += ws_size; | total_size += ws_size; | ||||
} | } | ||||
@@ -321,8 +323,9 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | |||||
} | } | ||||
if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { | ||||
GELOGE(INTERNAL_ERROR, "[%s] Failed to update kernel args.", node_->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.", | |||||
node_->GetName().c_str()); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
} | } | ||||
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); | ||||
@@ -360,7 +363,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint | |||||
num_inputs_, | num_inputs_, | ||||
num_outputs_, | num_outputs_, | ||||
unknown_type_)); | unknown_type_)); | ||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); | |||||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); | |||||
Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -418,7 +421,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector<GeTensorDesc> &input_desc, | |||||
"Input[%zu] update input shape failed.", input_index); | "Input[%zu] update input shape failed.", input_index); | ||||
continue; | continue; | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, | |||||
"Input_desc size is %zu, but get non_const_index is %zu", | "Input_desc size is %zu, but get non_const_index is %zu", | ||||
input_desc.size(), non_const_index); | input_desc.size(), non_const_index); | ||||
GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), | GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), | ||||
@@ -511,7 +514,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector<DataBuffer> &inputs, const vecto | |||||
arg_base++; | arg_base++; | ||||
continue; | continue; | ||||
} | } | ||||
GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), PARAM_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), ACL_ERROR_GE_PARAM_INVALID, | |||||
"Input size is %zu, but get non_const_index is %zu", | "Input size is %zu, but get non_const_index is %zu", | ||||
inputs.size(), non_const_index); | inputs.size(), non_const_index); | ||||
auto addr = inputs[non_const_index].data; | auto addr = inputs[non_const_index].data; | ||||
@@ -561,15 +564,15 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { | |||||
RT_MEMCPY_HOST_TO_DEVICE_EX, | RT_MEMCPY_HOST_TO_DEVICE_EX, | ||||
stream); | stream); | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); | |||||
return RT_FAILED; | |||||
GELOGE(ret, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str()); | GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str()); | ||||
ret = rtKernelLaunchEx(args_, arg_size_, 0, stream); | ret = rtKernelLaunchEx(args_, arg_size_, 0, stream); | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); | |||||
return RT_FAILED; | |||||
GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | ||||
@@ -747,9 +750,9 @@ Status AiCpuTask::InitForSummaryAndCopy() { | |||||
Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { | Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { | ||||
if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { | if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { | ||||
GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", | |||||
sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); | sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); | ||||
return PARAM_INVALID; | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); | ||||
GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), | GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), | ||||
@@ -759,8 +762,8 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { | |||||
auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), | auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), | ||||
kernel_def.args().data(), kernel_def.args().size()); | kernel_def.args().data(), kernel_def.args().size()); | ||||
if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; | |||||
} | } | ||||
aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_); | aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast<uintptr_t>(copy_ioaddr_dev_); | ||||
@@ -844,7 +847,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { | |||||
sm_desc, stream, dump_flag_); | sm_desc, stream, dump_flag_); | ||||
if (ret != RT_ERROR_NONE) { | if (ret != RT_ERROR_NONE) { | ||||
GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); | GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); | ||||
return ret; | |||||
return RT_ERROR_TO_GE_STATUS(ret); | |||||
} | } | ||||
GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); | ||||
GELOGD("Invoke rtCpuKernelLaunch succeeded"); | GELOGD("Invoke rtCpuKernelLaunch succeeded"); | ||||
@@ -242,7 +242,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); | ||||
if (rtRet != RT_ERROR_NONE) { | if (rtRet != RT_ERROR_NONE) { | ||||
GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet)); | GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast<int>(rtRet)); | ||||
return rtRet; | |||||
return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
} | } | ||||
const domi::KernelContext &context = kernel_def_.context(); | const domi::KernelContext &context = kernel_def_.context(); | ||||
@@ -261,7 +261,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & | |||||
rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); | ||||
if (rtRet != RT_ERROR_NONE) { | if (rtRet != RT_ERROR_NONE) { | ||||
GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet)); | GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast<int>(rtRet)); | ||||
return rtRet; | |||||
return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
} | } | ||||
} | } | ||||
@@ -287,7 +287,7 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ | |||||
auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); | ||||
if (rtRet != SUCCESS) { | if (rtRet != SUCCESS) { | ||||
GELOGE(rtRet, "rtGetFunctionByName failed."); | GELOGE(rtRet, "rtGetFunctionByName failed."); | ||||
return rtRet; | |||||
return RT_ERROR_TO_GE_STATUS(rtRet); | |||||
} | } | ||||
task.SetStubFunc(stub_name_, stub_func); | task.SetStubFunc(stub_name_, stub_func); | ||||
@@ -109,8 +109,13 @@ GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); | ||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); | ||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); | ||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, "Format is invalid when transferring shape."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Shape is invalid when transferring shape."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Datatype is invalid when transferring shape."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); | ||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); | |||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); | ||||
GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); | GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); | ||||
@@ -38,7 +38,12 @@ static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; | |||||
static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; | ||||
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; | ||||
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; | ||||
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; | |||||
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020; | |||||
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021; | |||||
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022; | |||||
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; | ||||
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; | |||||
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; | ||||
static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; | ||||
static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; | ||||
@@ -49,6 +54,7 @@ static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; | |||||
static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; | ||||
static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; | ||||
static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} // namespace ge | } // namespace ge | ||||
#endif | #endif | ||||
@@ -38,75 +38,53 @@ extern "C" { | |||||
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | ||||
class GeLog { | class GeLog { | ||||
public: | |||||
public: | |||||
static uint64_t GetTid() { | |||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
static pid_t GetTid() { | |||||
thread_local static pid_t tid = syscall(__NR_gettid); | |||||
return tid; | |||||
} | |||||
thread_local static uint64_t tid = static_cast<uint64_t>(syscall(__NR_gettid)); | |||||
#else | #else | ||||
static int GetTid() { | |||||
thread_local static int tid = static_cast<int>(GetCurrentThreadId()); | |||||
return tid; | |||||
} | |||||
thread_local static uint64_t tid = static_cast<uint64_t>(GetCurrentThreadId()); | |||||
#endif | #endif | ||||
return tid; | |||||
} | |||||
}; | }; | ||||
inline bool IsLogEnable(int module_name, int log_level) { | inline bool IsLogEnable(int module_name, int log_level) { | ||||
int32_t enable = CheckLogLevel(module_name, log_level); | int32_t enable = CheckLogLevel(module_name, log_level); | ||||
// 1:enable, 0:disable | // 1:enable, 0:disable | ||||
if (enable == 1) { | |||||
return true; | |||||
} | |||||
return false; | |||||
return (enable == 1); | |||||
} | } | ||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
#define GELOGE(ERROR_CODE, fmt, ...) \ | |||||
dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | ||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | ||||
#define GELOGW(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGI(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGD(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGW(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ | |||||
dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGI(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \ | |||||
dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGD(fmt, ...) \ | |||||
if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \ | |||||
dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | #define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | ||||
#define GELOGO(fmt, ...) \ | |||||
Dlog(GE_MODULE_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GELOGT(VALUE, fmt, ...) \ | |||||
do { \ | |||||
TraceStatus stat = VALUE; \ | |||||
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||||
int idx = static_cast<int>(stat); \ | |||||
char *k = const_cast<char *>("status"); \ | |||||
char *v = const_cast<char *>(TraceStatStr[idx]); \ | |||||
KeyValue kv = {k, v}; \ | |||||
DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ | |||||
#define GELOGT(VALUE, fmt, ...) \ | |||||
do { \ | |||||
TraceStatus stat = VALUE; \ | |||||
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||||
int idx = static_cast<int>(stat); \ | |||||
char *k = const_cast<char *>("status"); \ | |||||
char *v = const_cast<char *>(TraceStatStr[idx]); \ | |||||
KeyValue kv = {k, v}; \ | |||||
DlogWithKV(static_cast<int>(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \ | |||||
##__VA_ARGS__); \ | |||||
} while (0) | } while (0) | ||||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ | |||||
dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ | ||||
((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) | ||||
#define GE_LOG_WARN(MOD_NAME, fmt, ...) \ | |||||
if (IsLogEnable(MOD_NAME, DLOG_WARN)) dlog_warn(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GE_LOG_INFO(MOD_NAME, fmt, ...) \ | |||||
if (IsLogEnable(MOD_NAME, DLOG_INFO)) dlog_info(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GE_LOG_DEBUG(MOD_NAME, fmt, ...) \ | |||||
if (IsLogEnable(MOD_NAME, DLOG_DEBUG)) dlog_debug(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GE_LOG_EVENT(MOD_NAME, fmt, ...) dlog_event(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GE_LOG_OPLOG(MOD_NAME, fmt, ...) \ | |||||
Dlog(MOD_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) | |||||
#define GE_LOG_TRACE(MOD_NAME, value, fmt, ...) \ | |||||
do { \ | |||||
TraceStatus stat = value; \ | |||||
const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ | |||||
int idx = static_cast<int>(stat); \ | |||||
char *k = const_cast<char *>("status"); \ | |||||
char *v = const_cast<char *>(TraceStatStr[idx]); \ | |||||
KeyValue kv = {k, v}; \ | |||||
DlogWithKV(static_cast<int>(MOD_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ | |||||
} while (0) | |||||
// print memory when it is greater than 1KB. | // print memory when it is greater than 1KB. | ||||
#define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ | ||||
@@ -1 +1 @@ | |||||
Subproject commit bb86412204fc72fa8fe4063e6044090dfd714321 | |||||
Subproject commit 8ab60be2870b80b1ec952bb21c7f05ae2a624984 |
@@ -1 +1 @@ | |||||
Subproject commit d85b5fc685b9e1f8dbee778c9c7b3ab6f379af79 | |||||
Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b |
@@ -683,7 +683,7 @@ set(MULTI_PARTS_TEST_FILES | |||||
"common/format_transfer_nchw_fractalz_unittest.cc" | "common/format_transfer_nchw_fractalz_unittest.cc" | ||||
"common/format_transfer_hwcn_fractalz_unittest.cc" | "common/format_transfer_hwcn_fractalz_unittest.cc" | ||||
"common/format_transfer_nhwc_fractalz_unittest.cc" | "common/format_transfer_nhwc_fractalz_unittest.cc" | ||||
#"common/format_transfer_fractal_nz_unittest.cc" | |||||
"common/format_transfer_fractal_nz_unittest.cc" | |||||
"common/format_transfer_fractal_zz_unittest.cc" | "common/format_transfer_fractal_zz_unittest.cc" | ||||
"common/format_transfer_nhwc_5d_unittest.cc" | "common/format_transfer_nhwc_5d_unittest.cc" | ||||
"common/format_transfer_5d_nchw_unittest.cc" | "common/format_transfer_5d_nchw_unittest.cc" | ||||
@@ -679,7 +679,7 @@ TEST_F(UtestFormatTransfer5dNhwc, nc1hwc0_to_nhwc_float2) { | |||||
} | } | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransfer5dNhwc, invalid_src_format) { | TEST_F(UtestFormatTransfer5dNhwc, invalid_src_format) { | ||||
@@ -158,7 +158,7 @@ TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_fp16_success_lt_cube) { | |||||
} | } | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_gp16_success_eq_cube) { | TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_gp16_success_eq_cube) { | ||||
@@ -249,8 +249,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape1_uint8_3) { | |||||
} | } | ||||
*/ | */ | ||||
TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { | |||||
/*TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { | |||||
uint8_t data[32 * 32] = { | uint8_t data[32 * 32] = { | ||||
47, 78, 47, 180, 246, 76, 157, 127, 63, 0, 168, 23, 148, 198, 180, 190, 43, 187, 76, 67, 77, 246, 11, | 47, 78, 47, 180, 246, 76, 157, 127, 63, 0, 168, 23, 148, 198, 180, 190, 43, 187, 76, 67, 77, 246, 11, | ||||
149, 240, 236, 136, 123, 51, 95, 7, 163, 163, 64, 157, 230, 247, 122, 67, 106, 150, 20, 231, 118, 43, 208, | 149, 240, 236, 136, 123, 51, 95, 7, 163, 163, 64, 157, 230, 247, 122, 67, 106, 150, 20, 231, 118, 43, 208, | ||||
@@ -2157,7 +2156,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape3_fp16) { | |||||
for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { | for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { | ||||
EXPECT_EQ((reinterpret_cast<uint16_t *>(result2.data.get()))[i], data[i]); | EXPECT_EQ((reinterpret_cast<uint16_t *>(result2.data.get()))[i], data[i]); | ||||
} | } | ||||
} | |||||
}*/ | |||||
TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { | TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { | ||||
uint16_t data[2 * 2 * 17 * 4] = { | uint16_t data[2 * 2 * 17 * 4] = { | ||||
@@ -2333,7 +2332,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { | |||||
} | } | ||||
EXPECT_EQ( | EXPECT_EQ( | ||||
transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | ||||
UNSUPPORTED); | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, nd_shape5_fp16) { | TEST_F(UtestFormatTransferNdFractNz, nd_shape5_fp16) { | ||||
@@ -4785,6 +4784,8 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp32) { | |||||
for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { | for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { | ||||
EXPECT_EQ((reinterpret_cast<float *>(result2.data.get()))[i], data[i]); | EXPECT_EQ((reinterpret_cast<float *>(result2.data.get()))[i], data[i]); | ||||
} | } | ||||
EXPECT_EQ(transfer2.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, nchw_shape4_fp32) { | TEST_F(UtestFormatTransferNdFractNz, nchw_shape4_fp32) { | ||||
@@ -9059,7 +9060,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_shape) { | |||||
FormatTransferFractalNz transfer; | FormatTransferFractalNz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { | TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { | ||||
@@ -9079,7 +9080,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { | |||||
FormatTransferFractalNz transfer; | FormatTransferFractalNz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { | TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { | ||||
@@ -9094,8 +9095,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { | |||||
FormatTransferFractalNz transfer; | FormatTransferFractalNz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, invalid_dst_shape) { | TEST_F(UtestFormatTransferNdFractNz, invalid_dst_shape) { | ||||
@@ -9136,6 +9136,24 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type2) { | |||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) { | |||||
uint16_t data[1 * 1 * 1 * 16 * 16] = {0}; | |||||
TransArgs args{reinterpret_cast<uint8_t *>(data), | |||||
FORMAT_FRACTAL_NZ, | |||||
FORMAT_NHWC, | |||||
{1, 1, 1, 16, 16}, | |||||
{ | |||||
1, | |||||
1, | |||||
4, | |||||
4, | |||||
}, | |||||
DT_VARIANT}; | |||||
TransResult result; | |||||
FormatTransferFractalNzND transfer; | |||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | |||||
} | |||||
TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { | TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { | ||||
uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; | uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; | ||||
TransArgs args{reinterpret_cast<uint8_t *>(data), | TransArgs args{reinterpret_cast<uint8_t *>(data), | ||||
@@ -1894,7 +1894,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16_1) { | |||||
} | } | ||||
EXPECT_EQ( | EXPECT_EQ( | ||||
transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | ||||
UNSUPPORTED); | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { | TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { | ||||
@@ -2071,7 +2071,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { | |||||
} | } | ||||
EXPECT_EQ( | EXPECT_EQ( | ||||
transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | ||||
UNSUPPORTED); | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractZz, nd_shape5_fp16) { | TEST_F(UtestFormatTransferNdFractZz, nd_shape5_fp16) { | ||||
@@ -7879,7 +7879,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_shape) { | |||||
FormatTransferFractalZz transfer; | FormatTransferFractalZz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { | TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { | ||||
@@ -7899,7 +7899,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { | |||||
FormatTransferFractalZz transfer; | FormatTransferFractalZz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { | TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { | ||||
@@ -7914,7 +7914,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { | |||||
FormatTransferFractalZz transfer; | FormatTransferFractalZz transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | ||||
PARAM_INVALID); | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); | EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); | ||||
} | } | ||||
@@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_eq_cube) { | |||||
} | } | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_gt_cube) { | TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_gt_cube) { | ||||
@@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_eq_cube) { | |||||
} | } | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_gt_cube) { | TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_gt_cube) { | ||||
@@ -75,7 +75,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_format_nchw) { | |||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format_nc1khkwhwc0) { | TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format_nc1khkwhwc0) { | ||||
@@ -142,7 +142,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_shape3) { | |||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, PARAM_INVALID); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format) { | TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format) { | ||||
@@ -633,5 +633,14 @@ TEST_F(UtestFormatTransferNchw5d, unsupport_dst_format) { | |||||
TransResult result; | TransResult result; | ||||
EXPECT_NE(transfer.TransFormat(args, result), SUCCESS); | EXPECT_NE(transfer.TransFormat(args, result), SUCCESS); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNchw5d, invalid_data_format) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferNchwNc1hwc0 transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | |||||
} // namespace formats | } // namespace formats | ||||
} // namespace ge | } // namespace ge |
@@ -719,7 +719,7 @@ TEST_F(UtestFormatTransferNhwc5d, invalid_src_format) { | |||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
Status status = | Status status = | ||||
transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); | ||||
EXPECT_EQ(status, UNSUPPORTED); | |||||
EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | } | ||||
TEST_F(UtestFormatTransferNhwc5d, invalid_dst_shape2) { | TEST_F(UtestFormatTransferNhwc5d, invalid_dst_shape2) { | ||||
@@ -751,5 +751,20 @@ TEST_F(UtestFormatTransferNhwc5d, unsupport_dst_format) { | |||||
FormatTransferNhwcNc1hwc0 transfer; | FormatTransferNhwcNc1hwc0 transfer; | ||||
EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNhwc5d, invalid_data_shape) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferNhwcNc1hwc0 transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
TransArgs args2{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_STRING}; | |||||
FormatTransferNhwcNc1hwc0 transfer2; | |||||
EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); | |||||
} | |||||
} // namespace formats | } // namespace formats | ||||
} // namespace ge | } // namespace ge |
@@ -5353,5 +5353,44 @@ TEST_F(UtestFormatTransferNhwcFz, build_transfer_uint8) { | |||||
auto transfer = BuildFormatTransfer(args); | auto transfer = BuildFormatTransfer(args); | ||||
EXPECT_NE(transfer, nullptr); | EXPECT_NE(transfer, nullptr); | ||||
} | } | ||||
TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT}; | |||||
FormatTransferFractalZ transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); | |||||
} | |||||
TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_CHWN, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferFractalZ transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | |||||
TEST_F(UtestFormatTransferNhwcFz, invalid_data_shape) { | |||||
uint16_t data[1 * 4 * 4 * 1] = {0}; | |||||
TransArgs args{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferFractalZ transfer; | |||||
EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
TransArgs args2{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_HWCN, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferFractalZ transfer2; | |||||
EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
TransArgs args3{ | |||||
reinterpret_cast<uint8_t *>(data), FORMAT_NCHW, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; | |||||
FormatTransferFractalZ transfer3; | |||||
EXPECT_EQ(transfer3.TransShape(args3.src_format, args3.src_shape, args3.src_data_type, args3.dst_format, args3.dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | |||||
} // namespace formats | } // namespace formats | ||||
} // namespace ge | } // namespace ge |
@@ -4654,5 +4654,27 @@ TEST_F(UtestFormatTranspose, chwn_to_hwcn2) { | |||||
EXPECT_EQ((reinterpret_cast<uint16_t *>(result.data.get()))[i], ret[i]); | EXPECT_EQ((reinterpret_cast<uint16_t *>(result.data.get()))[i], ret[i]); | ||||
} | } | ||||
} | } | ||||
TEST_F(UtestFormatTranspose, invalid_data_shape) { | |||||
FormatTransferTranspose transfer; | |||||
std::vector<int64_t> dst_shape; | |||||
EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, std::vector<int64_t>({}), DT_FLOAT16, FORMAT_HWCN, dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); | |||||
} | |||||
TEST_F(UtestFormatTranspose, invalid_src_format) { | |||||
FormatTransferTranspose transfer; | |||||
std::vector<int64_t> dst_shape; | |||||
EXPECT_EQ(transfer.TransShape(FORMAT_NC1HWC0, std::vector<int64_t>({1, 3, 8, 8}), DT_FLOAT16, FORMAT_HWCN, dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | |||||
TEST_F(UtestFormatTranspose, invalid_dst_format) { | |||||
FormatTransferTranspose transfer; | |||||
std::vector<int64_t> dst_shape; | |||||
std::vector<int64_t> src_shape; | |||||
EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, src_shape, DT_FLOAT16, FORMAT_C1HWNC0, dst_shape), | |||||
ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); | |||||
} | |||||
} // namespace formats | } // namespace formats | ||||
} // namespace ge | } // namespace ge |
@@ -46,7 +46,7 @@ class UtestDavinciModel : public testing::Test { | |||||
} | } | ||||
}; | }; | ||||
TEST_F(UtestDavinciModel, init_success) { | |||||
/*TEST_F(UtestDavinciModel, init_success) { | |||||
DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||
ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ComputeGraphPtr graph = make_shared<ComputeGraph>("default"); | ||||
ProfilingManager::Instance().is_load_profiling_ = true; | ProfilingManager::Instance().is_load_profiling_ = true; | ||||
@@ -130,7 +130,7 @@ TEST_F(UtestDavinciModel, init_success) { | |||||
EXPECT_EQ(outputs.size(), 1); | EXPECT_EQ(outputs.size(), 1); | ||||
ProfilingManager::Instance().is_load_profiling_ = false; | ProfilingManager::Instance().is_load_profiling_ = false; | ||||
} | |||||
}*/ | |||||
TEST_F(UtestDavinciModel, init_data_op) { | TEST_F(UtestDavinciModel, init_data_op) { | ||||
DavinciModel model(0, nullptr); | DavinciModel model(0, nullptr); | ||||