diff --git a/build.sh b/build.sh index a111f387..f2fafd48 100644 --- a/build.sh +++ b/build.sh @@ -240,7 +240,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then rm -rf ${BASEPATH}/cov mkdir ${BASEPATH}/cov lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info - lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '*/ge/common/*' '*/ge/executor/*' '*/ge/graph/*' '*/ge/host_kernels/*' '/usr/local/*' -o cov/coverage.info + lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info cd ${BASEPATH}/cov genhtml coverage.info fi diff --git a/ge/common/dump/dump_op.cc b/ge/common/dump/dump_op.cc index 0b9e9dcc..5c768e22 100755 --- a/ge/common/dump/dump_op.cc +++ b/ge/common/dump/dump_op.cc @@ -99,8 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { } int64_t output_size = 0; if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { - GELOGE(PARAM_INVALID, "Get output size filed"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get output size in lanch dump op is %ld", output_size); output.set_size(output_size); @@ -126,8 +126,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { } int64_t input_size = 0; if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { - GELOGE(PARAM_INVALID, "Get output size filed"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Get input size in lanch dump op is %ld", input_size); input.set_size(input_size); @@ -151,31 +151,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { size_t proto_size = op_mapping_info.ByteSizeLong(); bool ret = op_mapping_info.SerializeToString(&proto_msg); if (!ret || proto_size == 0) { - GELOGE(FAILED, "Protobuf serialize failed,proto_size is %zu", proto_size); - return FAILED; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size); + return ACL_ERROR_GE_INTERNAL_ERROR; } rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtMemcpy failed, ret: 0x%X", rt_ret); - return RT_FAILED; + GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } constexpr int32_t io_addr_num = 2; @@ -193,8 +193,8 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) { nullptr, // no need smDesc stream_); if (rt_ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); - return rt_ret; + GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); } GELOGI("Kernel launch dump op success"); return SUCCESS; @@ -204,9 +204,15 @@ Status DumpOp::LaunchDumpOp() { GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str()); int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); - if (rt_ret != RT_ERROR_NONE || device_id < 0) { - GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); - return RT_FAILED; + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + if (device_id < 0) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, + "Check device_id failed, device_id = %d, which should be not less than 0.", + device_id); + return ACL_ERROR_GE_INTERNAL_ERROR; } aicpu::dump::OpMappingInfo op_mapping_info; auto dump_path = dump_properties_.GetDumpPath() + std::to_string(device_id) + "/"; @@ -232,29 +238,31 @@ Status DumpOp::LaunchDumpOp() { task.mutable_op()->set_op_name(op_desc_->GetName()); task.mutable_op()->set_op_type(op_desc_->GetType()); if (dump_properties_.GetDumpMode() == kDumpOutput) { - if (DumpOutput(task) != SUCCESS) { - GELOGE(FAILED, "Dump output failed"); - return FAILED; + auto ret = DumpOutput(task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump output failed"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } if (dump_properties_.GetDumpMode() == kDumpInput) { - if (DumpInput(task) != SUCCESS) { - GELOGE(FAILED, "Dump input failed"); - return FAILED; + auto ret = DumpInput(task); + if (ret != SUCCESS) { + GELOGE(ret, "Dump input failed"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } if (dump_properties_.GetDumpMode() == kDumpAll) { auto ret = DumpOutput(task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump output failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump output failed when in dumping all"); + return ret; } ret = DumpInput(task); if (ret != SUCCESS) { - GELOGE(FAILED, "Dump input failed when in dumping all"); - return FAILED; + GELOGE(ret, "Dump input failed when in dumping all"); + return ret; } op_mapping_info.mutable_task()->Add(std::move(task)); } diff --git a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc index 85f4038e..0cb581d7 100644 --- a/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc +++ b/ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc @@ -162,7 +162,7 @@ Status FormatTransferC1hwncoc0Hwcn::TransFormat(const TransArgs &args, TransResu Status FormatTransferC1hwncoc0Hwcn::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from C1HWNCoC0 to HWCN is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferC1hwncoc0Hwcn, FORMAT_C1HWNCoC0, FORMAT_HWCN) diff --git a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc index 79af84f7..eaa19d7d 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat Status TransShapeDhwckToFz3D(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kDhwcnDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto d = src_shape.at(kDhwcnD); auto h = src_shape.at(kDhwcnH); @@ -163,14 +163,14 @@ Status FormatTransferDhwcnFractalZ3D::TransShape(Format src_format, const std::v DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_DHWCN && dst_format == FORMAT_FRACTAL_Z_3D) { return TransShapeDhwckToFz3D(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferDhwcnFractalZ3D, FORMAT_DHWCN, FORMAT_FRACTAL_Z_3D) diff --git a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc index cd1e0607..3a18312a 100644 --- a/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc @@ -32,7 +32,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -50,7 +50,7 @@ Status TransShapeToFz(int64_t d, int64_t n, int64_t c, int64_t h, int64_t w, Dat Status TransShapeDhwncToFz3DTranspose(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kDhwncDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto d = src_shape.at(kDhwncD); auto h = src_shape.at(kDhwncH); @@ -164,14 +164,14 @@ Status FormatTransferDhwncFractalZ3DTranspose::TransShape(Format src_format, con DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_DHWNC && dst_format == FORMAT_FRACTAL_Z_3D_TRANSPOSE) { return TransShapeDhwncToFz3DTranspose(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferDhwncFractalZ3DTranspose, FORMAT_DHWNC, FORMAT_FRACTAL_Z_3D_TRANSPOSE) diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc index cb528453..c3b288c1 100755 --- a/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_nz.cc @@ -87,8 +87,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap hw_shape.push_back(DIM_DEFAULT_VALUE); hw_shape.push_back(src_shape[kNdDimIndexN]); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; default: @@ -106,8 +106,8 @@ Status TransShapeToFracNz(const ShapeVector &src_shape, DataType data_type, Shap hw_shape.push_back(src_shape[size - kNdDimCountBackwardsWH]); hw_shape.push_back(src_shape[size - kNdDimCountBackwardsW]); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -299,11 +299,19 @@ Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult & Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector &src_shape, DataType data_type, Format dst_format, ShapeVector &dst_shape) { - if (!IsDataTypeSupport(data_type) || !CheckShape(src_format, src_shape)) { - GELOGE(PARAM_INVALID, "Trans format from %s to %s, src shape %s, data type %s is not supported", + if (!IsDataTypeSupport(data_type)) { + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, + "Trans format from %s to %s, src shape %s, data type %s is not supported", TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; + } + if (!CheckShape(src_format, src_shape)) { + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, + "Trans format from %s to %s, src shape %s, data type %s is not supported", + TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str(), + ShapeToString(src_shape).c_str(), TypeUtils::DataTypeToSerialString(data_type).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } ShapeVector hw_shape; return TransShapeToFracNz(src_shape, data_type, dst_shape, hw_shape); @@ -334,7 +342,7 @@ Status FormatTransferFractalNzND::TransShape(Format src_format, const ShapeVecto Format dst_format, ShapeVector &dst_shape) { GELOGD("The shape derivation from %s to %s is not unique. Trans shape is not supported", TypeUtils::FormatToSerialString(src_format).c_str(), TypeUtils::FormatToSerialString(dst_format).c_str()); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFractalNz, FORMAT_ND, FORMAT_FRACTAL_NZ) diff --git a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc index dbceb911..45c6d157 100644 --- a/ge/common/formats/format_transfers/format_transfer_fractal_z.cc +++ b/ge/common/formats/format_transfers/format_transfer_fractal_z.cc @@ -42,7 +42,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto c1 = Ceil(c, c0); @@ -54,15 +54,16 @@ Status TransShapeToFz(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_ dst_shape.push_back(kNiSize); dst_shape.push_back(c0); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } Status TransShapeNchwToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNchwN); @@ -74,7 +75,7 @@ Status TransShapeNchwToFz(const std::vector &src_shape, DataType data_t Status TransShapeHwcnToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto h = src_shape.at(kHwcnH); @@ -87,7 +88,7 @@ Status TransShapeHwcnToFz(const std::vector &src_shape, DataType data_t Status TransShapeNhwcToFz(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNhwcN); @@ -369,7 +370,7 @@ Status FormatTransferFractalZ::TransFormat(const TransArgs &args, TransResult &r Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_NHWC && dst_format == FORMAT_FRACTAL_Z) { @@ -382,7 +383,7 @@ Status FormatTransferFractalZ::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from FracZ to HWCN is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFracZHwcn, FORMAT_FRACTAL_Z, FORMAT_HWCN) diff --git a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc index 915d0d76..90bf8fcb 100755 --- a/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc +++ b/ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc @@ -160,7 +160,7 @@ Status FormatTransferFracZNchw::TransFormat(const TransArgs &args, TransResult & Status FormatTransferFracZNchw::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from FracZ to NCHW is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferFracZNchw, FORMAT_FRACTAL_Z, FORMAT_NCHW) diff --git a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc index 7840b556..1e29baf2 100755 --- a/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc @@ -43,8 +43,9 @@ Status TransShapeHwcnToC1hwncoc0(const DataType &data_type, const std::vector &dst_shape) { if (src_format == FORMAT_HWCN && CheckDataTypeSupported(data_type)) { if (!CheckShapeValid(src_shape, kHwcnDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return TransShapeHwcnToC1hwncoc0(data_type, src_shape, dst_shape); + } else if (src_format != FORMAT_HWCN) { + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc index a37ba2b5..fd09b34c 100755 --- a/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc +++ b/ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc @@ -157,7 +157,7 @@ Status FormatTransferNc1hwc0Nhwc::TransFormat(const TransArgs &args, TransResult Status FormatTransferNc1hwc0Nhwc::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { GELOGD("The shape derivation from NC1HWC0 to NHWC is not unique. Trans shape in this direction is not supported"); - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferNc1hwc0Nhwc, FORMAT_NC1HWC0, FORMAT_NHWC) diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc index 49b19f46..dd8721c0 100644 --- a/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc @@ -45,7 +45,7 @@ Status CheckDataTypeSupport(DataType data_type) { return GetSizeByDataType(data_ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type, std::vector &dst_shape) { auto c0 = GetCubeSizeByDataType(data_type); if (c0 < 0) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } auto chw = c * h * w; @@ -59,8 +59,9 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type dst_shape.push_back(c0); if (!IsShapeValid(dst_shape)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -68,7 +69,7 @@ Status TransShape(int64_t n, int64_t c, int64_t h, int64_t w, DataType data_type Status TransShapeNchwToFzC04(const std::vector &src_shape, DataType data_type, std::vector &dst_shape) { if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } auto n = src_shape.at(kNchwN); @@ -293,13 +294,13 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult Status FormatTransferNchwToFZC04::TransShape(Format src_format, const std::vector &src_shape, DataType data_type, Format dst_format, std::vector &dst_shape) { if (CheckDataTypeSupport(data_type) != SUCCESS) { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (src_format == FORMAT_NCHW && dst_format == FORMAT_FRACTAL_Z_C04) { return TransShapeNchwToFzC04(src_shape, data_type, dst_shape); } - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } REGISTER_FORMAT_TRANSFER(FormatTransferNchwToFZC04, FORMAT_NCHW, FORMAT_FRACTAL_Z_C04) diff --git a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc index 98af1efa..752a4d64 100755 --- a/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc @@ -32,12 +32,13 @@ Status TransShapeNchwToNc1hwc0(const std::vector &src_shape, DataType d std::vector &dst_shape) { int64_t c0 = GetCubeSizeByDataType(data_type); if (c0 <= 0) { - GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } if (!CheckShapeValid(src_shape, kNchwDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } dst_shape.clear(); dst_shape.push_back(src_shape.at(kNchwN)); @@ -46,8 +47,9 @@ Status TransShapeNchwToNc1hwc0(const std::vector &src_shape, DataType d dst_shape.push_back(src_shape.at(kNchwW)); dst_shape.push_back(c0); if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -193,7 +195,7 @@ Status FormatTransferNchwNc1hwc0::TransShape(Format src_format, const std::vecto if (src_format == FORMAT_NCHW) { return TransShapeNchwToNc1hwc0(src_shape, data_type, dst_shape); } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc index 8faaf4e7..2c6b392d 100755 --- a/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc +++ b/ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc @@ -34,8 +34,8 @@ Status TransShapeNhwcToNc1hwc0(const std::vector &src_shape, DataType d std::vector &dst_shape) { int64_t c0 = GetCubeSizeByDataType(data_type); if (c0 <= 0) { - GELOGE(PARAM_INVALID, "Failed to get cube size, the data type is invalid"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Failed to get cube size, the data type is invalid"); + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } dst_shape.clear(); dst_shape.push_back(src_shape.at(kNhwcN)); @@ -44,8 +44,9 @@ Status TransShapeNhwcToNc1hwc0(const std::vector &src_shape, DataType d dst_shape.push_back(src_shape.at(kNhwcW)); dst_shape.push_back(c0); if (!CheckShapeValid(dst_shape, kNc1hwc0DimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check dst shape %s", ShapeToString(dst_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check dst shape %s", + ShapeToString(dst_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return SUCCESS; } @@ -189,12 +190,15 @@ Status FormatTransferNhwcNc1hwc0::TransShape(Format src_format, const std::vecto DataType data_type, Format dst_format, std::vector &dst_shape) { if (src_format == FORMAT_NHWC && CheckDataTypeSupported(data_type)) { if (!CheckShapeValid(src_shape, kNhwcDimsNum)) { - GELOGE(PARAM_INVALID, "Failed to check src shape %s", ShapeToString(src_shape).c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Failed to check src shape %s", + ShapeToString(src_shape).c_str()); + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } return TransShapeNhwcToNc1hwc0(src_shape, data_type, dst_shape); + } else if (src_format != FORMAT_NHWC) { + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } else { - return UNSUPPORTED; + return ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID; } } diff --git a/ge/common/formats/format_transfers/format_transfer_transpose.cc b/ge/common/formats/format_transfers/format_transfer_transpose.cc index 9be74b1f..de0b456c 100755 --- a/ge/common/formats/format_transfers/format_transfer_transpose.cc +++ b/ge/common/formats/format_transfers/format_transfer_transpose.cc @@ -211,16 +211,16 @@ Status GetPermByForamt(Format src_format, Format dst_format, std::vectorsecond.find(dst_format); if (iter == dst_iter->second.end()) { std::string error = "Failed to trans shape, do not support transpose from format " + FmtToStr(TypeUtils::FormatToSerialString(src_format)) + " to " + FmtToStr(TypeUtils::FormatToSerialString(dst_format)); - GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); - return UNSUPPORTED; + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } perm = iter->second; return SUCCESS; @@ -244,7 +244,7 @@ Status FormatTransferTranspose::TransShape(Format src_format, const std::vector< std::vector perm_arg; GE_CHK_STATUS_RET_NOLOG(GetPermByForamt(src_format, dst_format, perm_arg)); if (!IsShapeArgValid(src_shape, perm_arg)) { - return PARAM_INVALID; + return ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID; } dst_shape = TransShapeByPerm(src_shape, perm_arg); return SUCCESS; diff --git a/ge/common/formats/formats.cc b/ge/common/formats/formats.cc index 0b21a884..2b979e9a 100755 --- a/ge/common/formats/formats.cc +++ b/ge/common/formats/formats.cc @@ -64,8 +64,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Status TransShape(Format src_form std::string error = "Failed to trans data from format " + FmtToStr(TypeUtils::FormatToSerialString(args.src_format)) + " to " + FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)); - GE_ERRORLOG_AND_ERRORMSG(UNSUPPORTED, error.c_str()); - return UNSUPPORTED; + GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, error.c_str()); + return ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID; } return transfer->TransShape(src_format, src_shape, data_type, dst_format, dst_shape); diff --git a/ge/common/ge/plugin_manager.cc b/ge/common/ge/plugin_manager.cc index 75a36d99..38de251e 100644 --- a/ge/common/ge/plugin_manager.cc +++ b/ge/common/ge/plugin_manager.cc @@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec std::vector path_vec; SplitPath(path, path_vec); for (const auto &single_path : path_vec) { - GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, + GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "The shared library file path is too long!"); continue); // load break when number of loaded so reach maximum @@ -125,7 +125,8 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec GE_IF_BOOL_EXEC(error == nullptr, error = ""); ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, {"mmDlopen", "shared library path is " + FmtToStr(file_path_dlopen) + ". Errormessage" + FmtToStr(error)}); - GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", + GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, + "Failed to dlopen the shared library path[%s]. Errormessage[%s]!", file_path_dlopen.c_str(), error); continue; } @@ -138,8 +139,8 @@ Status PluginManager::LoadSo(const string &path, const vector &func_chec ErrorManager::GetInstance().ATCReportErrMessage("E19012", {"function", "reason"}, {"mmDlsym", FmtToStr(func_name) + " is skipped since function" + FmtToStr(func_name) + " is not existed!"}); - GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), - func_name.c_str()); + GELOGE(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", + func_name.c_str(), func_name.c_str()); is_valid = false; break; } diff --git a/ge/common/helper/model_helper.cc b/ge/common/helper/model_helper.cc index 05914b22..37cb53bc 100644 --- a/ge/common/helper/model_helper.cc +++ b/ge/common/helper/model_helper.cc @@ -479,8 +479,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadModel(c Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { - GELOGE(status, "Parse model content failed!"); - return status; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); + return ACL_ERROR_GE_PARAM_INVALID; } file_header_ = reinterpret_cast(model_data.model_data); @@ -517,8 +517,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod } if (is_assign_model_) { - GELOGE(GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); - return GE_EXEC_LOAD_MODEL_REPEATED; + GELOGE(ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED, "Model helper has already loaded!"); + return ACL_ERROR_GE_EXEC_LOAD_MODEL_REPEATED; } if (ReleaseLocalModelData() != SUCCESS) { @@ -528,8 +528,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod Status status = ge::DavinciModelParser::ParseModelContent(model_data, model_addr_tmp_, model_len_tmp_); if (status != SUCCESS) { - GELOGE(status, "Parse model content failed!"); - return status; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Parse model content failed!"); + return ACL_ERROR_GE_PARAM_INVALID; } file_header_ = reinterpret_cast(model_data.model_data); @@ -609,7 +609,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { GeModelPtr cur_model = ge::MakeShared(); Status ret = LoadModelData(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_MODEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED; } if (is_first_model) { @@ -622,22 +622,22 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) { ret = LoadWeights(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED; } ret = LoadTBEKernelStore(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; } ret = LoadCustAICPUKernelStore(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_KERNEL_PARTITION_FAILED; } ret = LoadTask(om_load_helper, cur_model, mode_index); if (ret != SUCCESS) { - return GE_EXEC_LOAD_TASK_PARTITION_FAILED; + return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } root_model_->SetSubgraphInstanceNameToModel(cur_model->GetName(), cur_model); } diff --git a/ge/common/model_parser/base.cc b/ge/common/model_parser/base.cc index 64277199..22837be6 100644 --- a/ge/common/model_parser/base.cc +++ b/ge/common/model_parser/base.cc @@ -34,7 +34,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro ge::ModelData &model_data) { std::string real_path = RealPath(model_path); if (real_path.empty()) { - GELOGE(GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); + GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "Model file path '%s' is invalid", model_path); return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; } diff --git a/ge/common/profiling/ge_profiling.cc b/ge/common/profiling/ge_profiling.cc index 43ed6434..9060f82b 100644 --- a/ge/common/profiling/ge_profiling.cc +++ b/ge/common/profiling/ge_profiling.cc @@ -181,7 +181,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le if (type != kProfCommandhandleFinalize) { command.module_index = prof_config_param->profSwitch; } - GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), + GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%lx", iter->second.c_str(), command.module_index); if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); @@ -192,7 +192,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le return ge::FAILED; } - GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index); + GELOGI("Successfully execute profiling command type: %d, command 0x%lx.", type, command.module_index); return ge::SUCCESS; } diff --git a/ge/common/profiling/profiling_manager.cc b/ge/common/profiling/profiling_manager.cc index 32f0ee40..86b1b2c5 100644 --- a/ge/common/profiling/profiling_manager.cc +++ b/ge/common/profiling/profiling_manager.cc @@ -540,7 +540,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi for (auto device_id_module : device_id_module_map_) { if (device_id_module.second != 0) { uint32_t device_id = static_cast(device_id_module.first); - GELOGI("Prof finalize: device_id: %u, module: 0x%llx.", device_id, device_id_module.second); + GELOGI("Prof finalize: device_id: %u, module: 0x%lx.", device_id, device_id_module.second); rt_ret = rtProfilerStop(device_id_module.second, 1, &device_id); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Runtime profiler stop failed."); @@ -629,7 +629,7 @@ Status ProfilingManager::ProfParseParam(const std::map } if (device_num == 0 || device_num > kMaxDeviceNum || device_num != static_cast(device_list.size())) { - GELOGE(FAILED, "Config para device num: %d not equal to device list size: %d.", device_num, device_list.size()); + GELOGE(FAILED, "Config para device num: %d not equal to device list size: %zu.", device_num, device_list.size()); return FAILED; } #endif @@ -659,7 +659,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); + GELOGI("Runtime config param: 0x%lx, device num: %d.", module, device_num); rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { @@ -701,7 +701,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt for (int32_t i = 0; i < device_num; i++) { device_id_ptr[i] = static_cast(device_list[i]); } - GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); + GELOGI("Prof stop: runtime config param: 0x%lx, device num: %d", module, device_num); rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); if (rt_ret != RT_ERROR_NONE) { GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc index 63cff228..af8237e0 100755 --- a/ge/executor/ge_executor.cc +++ b/ge/executor/ge_executor.cc @@ -226,7 +226,7 @@ Status GeExecutor::Initialize() { } GE_CHK_STATUS_RET(OpsKernelBuilderManager::Instance().Initialize({}, false), - "Failed to initialize OpsKernelBuilders"); + "Failed to initialize OpsKernelBuilders."); // Start profiling Options profiling_options; diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index d032965b..fe7ea3bf 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -670,7 +670,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, bool is_offline) { if (!is_offline) { - (void)AttrUtils::SetBool(op_desc, ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, true); + (void)AttrUtils::SetBool(op_desc, ATTR_SINGLE_OP_SCENE, true); } if (CheckForSingleOp(op_desc, inputs, outputs) != SUCCESS) { diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc index b13781f8..2731e076 100644 --- a/ge/graph/build/graph_builder.cc +++ b/ge/graph/build/graph_builder.cc @@ -37,6 +37,8 @@ using domi::BuildMode; namespace { const int32_t kInvalidPerfLevel = -1; +const int64_t kProfilingArStep = 2; +const int64_t kProfilingArStartLogid = 3; enum NodeType { kSubgraphData, kSubgraphNode, kOthers }; } // namespace namespace ge { @@ -457,6 +459,11 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { if (all_reduce_node_index[i] == node_index) { GELOGI("The all reduce node of dynamic graph is %s, idx %u", op_desc->GetName().c_str(), node_index); (void)ge::AttrUtils::SetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, true); + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + int64_t log_id = i * kProfilingArStep + kProfilingArStartLogid; + (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); continue; } } diff --git a/ge/graph/build/memory/binary_block_mem_assigner.cc b/ge/graph/build/memory/binary_block_mem_assigner.cc index fff589f3..97a0aed6 100644 --- a/ge/graph/build/memory/binary_block_mem_assigner.cc +++ b/ge/graph/build/memory/binary_block_mem_assigner.cc @@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector &range_ceils) { GELOGW("Vector all_memory_size is empty!"); return SUCCESS; } - if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { - GELOGE(FAILED, "dividend is 0!"); + if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { + GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); return FAILED; } // Memory size is 512 aligned, so it is not necessary to take less than 512 diff --git a/ge/graph/build/memory/block_mem_assigner.cc b/ge/graph/build/memory/block_mem_assigner.cc index a7564e01..ebd23948 100755 --- a/ge/graph/build/memory/block_mem_assigner.cc +++ b/ge/graph/build/memory/block_mem_assigner.cc @@ -66,10 +66,7 @@ void AlignMemOffset(size_t &mem_align_size) { } static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { - auto left_node_op_desc = left.node->GetOpDesc(); - auto right_node_op_desc = right.node->GetOpDesc(); - if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) - && (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { return true; } return false; @@ -101,14 +98,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { auto left_node_op_desc = left.node->GetOpDesc(); auto right_node_op_desc = right.node->GetOpDesc(); if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { - if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { - if (left.life_time_end >= static_cast(right_node_op_desc->GetId())) { + if (left.GetLifeBegin() < right.GetLifeBegin()) { + if (left.life_time_end >= right.GetLifeBegin()) { return true; } - } else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { + } else if (left.GetLifeBegin() == right.GetLifeBegin()) { return true; } else { - if (right.life_time_end >= static_cast(left_node_op_desc->GetId())) { + if (right.life_time_end >= left.GetLifeBegin()) { return true; } } @@ -326,12 +323,7 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ size_t MemoryBlock::GetLifeBegin() { size_t life_time = 0; if (!node_type_index_list_.empty()) { - if (node_type_index_list_.front().node != nullptr) { - auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); - if (node_op_desc != nullptr) { - life_time = node_op_desc->GetId(); - } - } + life_time = node_type_index_list_.front().GetLifeBegin(); } return life_time; } @@ -418,7 +410,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ depend_stream_life_[stream_id_] = GetLifeBegin(); } -size_t MemoryBlock::GetLifeEnd() { +size_t MemoryBlock::GetLifeEnd() const { if (!node_type_index_list_.empty()) { return node_type_index_list_.back().life_time_end; } @@ -592,32 +584,29 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector &all_memory_size) { for (auto &out_anchor : n->GetAllOutDataAnchors()) { GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); - bool reuse_input = false; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, - GELOGI("Get reuse_input failed")); - - if (!reuse_input) { - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); - batch_all_memory_size[batch_label].emplace_back(size); - if (batch_total_size.find(batch_label) == batch_total_size.end()) { - batch_total_size[batch_label] = size; - } else { - batch_total_size[batch_label] += size; - } + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); + GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", + node_op_desc->GetName().c_str(), size); + return;); + batch_all_memory_size[batch_label].emplace_back(size); + if (batch_total_size.find(batch_label) == batch_total_size.end()) { + batch_total_size[batch_label] = size; + } else { + batch_total_size[batch_label] += size; + } - if (!anchor_to_symbol_.empty()) { - auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); - if (iter1 == anchor_to_symbol_.end()) { - continue; - } - const std::string &symbol = iter1->second; - auto iter2 = symbol_size_.find(symbol); - if (iter2 == symbol_size_.end()) { - symbol_size_[symbol] = size; - } else if (size > static_cast(iter2->second)) { - iter2->second = size; - } + if (!anchor_to_symbol_.empty()) { + auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); + if (iter1 == anchor_to_symbol_.end()) { + continue; + } + const std::string &symbol = iter1->second; + auto iter2 = symbol_size_.find(symbol); + if (iter2 == symbol_size_.end()) { + symbol_size_[symbol] = size; + } else if (size > static_cast(iter2->second)) { + iter2->second = size; } } } @@ -658,35 +647,17 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { return false; } -void AddReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - it->second++; - } else { - reusable_block_counts[key] = 1; - } -} - -void ReduceReusableBlockCount(const MemoryBlock &mem_block, map &reusable_block_counts) { - string key = std::to_string(mem_block.Size()); - key += "_" + std::to_string(mem_block.stream_id_); - key += "_" + std::to_string(mem_block.memory_type_); - auto it = reusable_block_counts.find(key); - if (it != reusable_block_counts.end()) { - if (it->second > 0) { - it->second--; - } - } -} - -bool CanReuseBySize(const map &reusable_block_counts, const MemoryBlock &reusable_block, - size_t block_size, size_t real_size, bool continuous) { +bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { bool can_reuse = false; if (reusable_block.Size() == block_size) { - can_reuse = true; + // in some continuous input case, continuous first input node's is not same as topo first node. + if (continuous_life_begin > 0) { + if (continuous_life_begin > reusable_block.GetLifeEnd()) { + can_reuse = true; + } + } else { + can_reuse = true; + } } return can_reuse; } @@ -697,6 +668,13 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { return false; } + auto node_desc = n->GetOpDesc(); + GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); + return false;); + std::vector offsets_for_fusion = {}; + bool has_lx_fusion_attr = + AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { auto out_anchor = n->GetOutDataAnchor(out_index); GE_IF_BOOL_EXEC(out_anchor == nullptr, @@ -719,16 +697,17 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false;); // If GetBool fail, is_input_continuous is false. - bool is_input_continuous_no_padding = false; - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, - is_input_continuous_no_padding); - if (is_input_continuous_no_padding) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (is_input_continuous) { reset_zero_copy_flag = true; - return false; + has_lx_fusion_attr = true; + } else { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); } - (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); - GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), + // lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later + GE_IF_BOOL_EXEC(is_input_continuous && + (CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); no_need_assign_memory = true; return false;); @@ -742,6 +721,10 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou // Only set attr one times. if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + // lx fusion case assign max size for first block, so reuse as none continuous + GE_IF_BOOL_EXEC(has_lx_fusion_attr, + is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); + return false;); node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); } peer_input_index = peer_in_anchor->GetIdx(); @@ -754,6 +737,95 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou return false; } +bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { + if (n == nullptr) { + return false; + } + + int64_t max_node_life_time = 0; + int64_t continuous_input_node_life_time = 0; + if (static_cast(out_index) < n->GetAllOutDataAnchors().size()) { + auto out_anchor = n->GetOutDataAnchor(out_index); + if(out_anchor == nullptr) { + return false; + } + + // continuous input node's life time should be max + for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { + if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ + return false; + } + auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); + GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, + GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); + return false;); + + if(peer_in_node_desc->GetId() > max_node_life_time) { + max_node_life_time = peer_in_node_desc->GetId(); + } + + // If GetBool fail, is_input_continuous is false. + bool is_input_continuous = false; + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); + if (!is_input_continuous) { + (void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + } + if (is_input_continuous) { + continuous_input_node_life_time = peer_in_node_desc->GetId(); + } + } + } + return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; +} + +/// +/// @ingroup GE +/// @brief Check continuous memory reuseable +/// @return void +/// +bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { + // n,peer_node_desc have been checked + auto node_desc = n->GetOpDesc(); + auto peer_node_desc = peer_node->GetOpDesc(); + continuous_life_begin_ = static_cast(node_desc->GetId()); + // lx fusion case check all continuous input node, firt input node's life time should be min + for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { + if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || + (in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { + GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); + return false; + } + auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); + /// + /// node2 node1 node3 + /// | / / | + /// node5 node6 + /// firt input node's life time is not min + /// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse + /// + if (static_cast(peer_out_node_desc->GetId()) < continuous_life_begin_) { + continuous_life_begin_ = static_cast(peer_out_node_desc->GetId()); + GELOGI( + "Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," + "min is node[%s] life[%zu]", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); + } + // when node3's output node5's life time is not max(node6 > node5), not reuse + if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), + in_anchor->GetPeerOutAnchor()->GetIdx())) { + GELOGI( + "Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " + "max life node", + n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), + peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); + return false; + } + } + return true; +} + /// /// @ingroup GE /// @brief Check pre_reuse flag & post_reuse glag for each symbol @@ -1039,8 +1111,9 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); // A node can reuse blocks of the same stream and preorder streams - if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { - reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); + if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { + reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, + real_size, no_align_size); if (mem_type == kOutput) { auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); if (iter != anchor_to_symbol_.end()) { @@ -1049,7 +1122,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, } reusable_block->continuous_block_ = continuous; reusable_block->ref_count_++; - ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); reusable_blocks_[memory_type][stream_id].erase((++it).base()); return reusable_block; } @@ -1062,8 +1134,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, // Data and netoutput need zero copy block block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); - - block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); + block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); block->stream_id_ = node_op_desc->GetStreamId(); block->ref_count_++; block->continuous_block_ = continuous; @@ -1220,8 +1291,23 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, std::string symbol; if (IsSymbolExist(node_index_io, symbol)) { block = symbol_blocks_[symbol]; - block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); + GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); + return nullptr); + // reduce old size + size_t align_size = block->Size(); + AlignMemOffset(align_size); + theory_memory_size_ -= align_size; + + auto block_size = GetBlockSize(size, ranges); + block->SetSize(block_size); + block->SetLifeTimeEnd(life_time_); + block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); block->ref_count_++; + + // add new size + align_size = block_size; + AlignMemOffset(align_size); + theory_memory_size_ += align_size; } else { // if ref input is variable, can not find symbol, must judge alone if (IsOutputIndexRef(node_op_desc, index)) { @@ -1281,7 +1367,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, GELOGI("Get dst_reuse_input_index failed")); if (dst_reuse_input && (dst_reuse_input_index == static_cast(in_anchor->GetIdx()))) { - block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); out_count_reuse_input += 1; reuse_input = true; } @@ -1322,7 +1407,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i if (static_cast(index) == output_index) { if (node->GetOwnerComputeGraph() != nullptr) { string graph_name = node->GetOwnerComputeGraph()->GetName(); - GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), + GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), op_desc->GetName().c_str(), index, op_desc->GetStreamId()); } return true; @@ -1360,7 +1445,6 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vectorsame_stream_) { to_release->SetLifeTimeEnd(life_time_); reusable_memory.emplace_back(to_release); - AddReusableBlockCount(*to_release, reusable_block_counts_); } } } @@ -1460,6 +1544,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector } is_op_reuse_mem_ = true; + continuous_life_begin_ = 0; if (op_reuse_env_valid_ == true) { vector::iterator it_name = std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); @@ -1516,7 +1601,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector continue; } // atomic can't be reused - bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; + bool need_change = is_op_reuse_mem_ && is_atomic; if (need_change) { is_op_reuse_mem_ = false; } @@ -1909,11 +1994,12 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, } op_desc->SetWorkspace(workspace_list); } - GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " - "life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), - op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), - block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, - block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, + GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " + "noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", + graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), + node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, + block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, + block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, block->batch_label_.c_str()); } diff --git a/ge/graph/build/memory/block_mem_assigner.h b/ge/graph/build/memory/block_mem_assigner.h index e1db6cad..4401108d 100755 --- a/ge/graph/build/memory/block_mem_assigner.h +++ b/ge/graph/build/memory/block_mem_assigner.h @@ -39,14 +39,15 @@ using DependStreamLife = std::map>; enum OpMemoryType { kOutput, kWorkspace }; struct NodeTypeIndex { - NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) - : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} + NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) + : node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} ge::NodePtr node = nullptr; OpMemoryType mem_type = kOutput; uint32_t index = 0; - size_t life_time_end = kMaxLifeTime; bool ref_input = false; + size_t life_time_begin = 0; + size_t life_time_end = kMaxLifeTime; const string GetMemType() const { if (mem_type == kOutput) { return "output"; @@ -55,6 +56,34 @@ struct NodeTypeIndex { } return "unknown"; } + + size_t GetLifeBegin() const { + if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { + return 0; + } + + if ((life_time_begin > 0) && (life_time_begin < static_cast(node->GetOpDesc()->GetId()))) { + return life_time_begin; + } else { + return node->GetOpDesc()->GetId(); + } + } + + std::string GetLifeBeginDesc() const { + if (node == nullptr) { + return ""; + } + auto node_op_desc = node->GetOpDesc(); + if (node_op_desc != nullptr) { + auto life_begin = GetLifeBegin(); + if (life_begin != static_cast(node_op_desc->GetId())) { + return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); + } else { + return std::to_string(node_op_desc->GetId()); + } + } + return ""; + } }; class MemoryBlock { @@ -86,16 +115,13 @@ class MemoryBlock { symbol_list_.clear(); } - void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, - int64_t stream_id) { - real_size_list_.emplace_back(real_size); - no_align_size_list_.emplace_back(no_align_size); - node_type_index_list_.emplace_back(node, type, out_index, false); - if (stream_id != stream_id_) { - same_stream_ = false; + size_t Size() const { return block_size_; } + + void SetSize(size_t size) { + if (size > block_size_) { + block_size_ = size; } } - size_t Size() const { return block_size_; } size_t AlignSize() const; @@ -143,7 +169,7 @@ class MemoryBlock { size_t GetLifeBegin(); - size_t GetLifeEnd(); + size_t GetLifeEnd() const; void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); @@ -406,6 +432,7 @@ class BlockMemAssigner : public MemAssigner { bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); + bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); /// /// @ingroup GE /// @|+++++++++block1++++++++| |+++++++++block1++++++++| @@ -429,8 +456,6 @@ class BlockMemAssigner : public MemAssigner { std::unordered_map>> reusable_blocks_; - std::map reusable_block_counts_; - std::unordered_map>> stream_workspace_blocks_; std::unordered_map> node_out_blocks_; @@ -460,6 +485,7 @@ class BlockMemAssigner : public MemAssigner { std::string max_batch_label_; + size_t continuous_life_begin_ = 0; /// /// @ [stream1][nodeid] /// @[nodeid] [stream2][nodeid] diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index fe4c59d2..a868fdcd 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -35,10 +35,9 @@ namespace { const int kAllInputAddrIsAtomic = -1; const int kVirtualInputNodeMemoryReuse = 0; const int kVirtualOutputNodeMemoryReuse = 1; -const size_t kVirtualInputNodeOutputSize = 1; -const size_t kVirtualOutputNodeInputSize = 1; -const size_t kVirtualNodeDataIndex = 0; -const char *const kMbatchNodeNameFlag = "_ascend_mbatch_batch_"; +// One state per bit cannot be repeated +enum ContinuousType { kTypeInput = 1, kTypeInputNoPadding = 2, kTypeOutput = 4, kTypeOutputNoPadding = 8 }; + int64_t GetSymbolOutputOffset(const std::map &anchor_to_symbol, const std::map> &symbol_to_anchors, const ge::NodePtr &node, const uint32_t i) { @@ -136,7 +135,7 @@ ge::Status GraphMemoryAssigner::AssignVarAttr2Nodes() { return ge::SUCCESS; } -ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, +ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size) { graphStatus graph_status = ge::TensorUtils::GetSize(*output_desc, out_size); @@ -181,68 +180,6 @@ ge::Status GraphMemoryAssigner::CalculateTensorRealSizeAndOutSize(const ge::Cons return SUCCESS; } -Status GraphMemoryAssigner::GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label) { - for (auto &i_map : mem_reuse_virtual_nodes_map) { - vector virtual_nodes_list = i_map.second; - vector max_shape_dims; - size_t max_batch_dim = 0; - bool max_batch_dim_find = false; - for (size_t i = 0; i < virtual_nodes_list.size(); ++i) { - GE_CHECK_NOTNULL(virtual_nodes_list[i]); - OpDescPtr op_desc = virtual_nodes_list[i]->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - - ge::ConstGeTensorDescPtr input_output_desc; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - input_output_desc = op_desc->GetOutputDescPtr(kVirtualNodeDataIndex); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - input_output_desc = op_desc->GetInputDescPtr(kVirtualNodeDataIndex); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHECK_NOTNULL(input_output_desc); - - if (i == 0) { - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - max_shape_dims = input_output_desc->GetShape().GetDims(); - } else { - vector current_shape_dims = input_output_desc->GetShape().GetDims(); - if (current_shape_dims.size() != max_shape_dims.size()) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - for (size_t j = 0; j < current_shape_dims.size(); ++j) { - if (current_shape_dims[j] == max_shape_dims[j]) { - continue; - } - if (max_batch_dim_find && max_batch_dim != j) { - std::string error = "The shape of several nodes between multiple batches does not match."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - max_batch_dim_find = true; - max_batch_dim = j; - if (current_shape_dims[j] > max_shape_dims[j]) { - max_shape_dims[j] = current_shape_dims[j]; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, max_batch_label); - } - // Only compare the first different dim in shape. - break; - } - } - } - // In every element of virtual_input_nodes_map, the label of the max batch node is the same. - break; - } - return SUCCESS; -} - Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, map &mem_type_to_offset) { if (memory_offset_.empty()) { GELOGE(FAILED, "memory_offset_ is empty."); @@ -250,13 +187,6 @@ Status GraphMemoryAssigner::ReAssignMemory(bool is_loop_graph, mapGetGraphMemoryMaxSize())}); + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), + iter.second, iter.first); } return ge::FAILED; } @@ -313,22 +245,137 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map &mem_offse return SUCCESS; } +uint32_t GetContinuousMemoryType(const OpDescPtr &op_desc) { + if (op_desc == nullptr) { + return 0; + }; + + bool is_continuous = false; + uint32_t continuous_type = 0; + // If GetBool fail, is_continuous is false. + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_continuous); + if (is_continuous) { + continuous_type |= kTypeInput; + } else { + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_continuous); + if (is_continuous) { + bool attr_reuse = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kTypeInputNoPadding; + } + } + } + + is_continuous = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + continuous_type |= kTypeOutput; + } else { + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, is_continuous); + if (is_continuous) { + bool attr_reuse = false; + (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); + if (attr_reuse) { + continuous_type |= kTypeOutputNoPadding; + } + } + } + + if (continuous_type != 0) { + GELOGI("Current node %s continuous type %d.", op_desc->GetName().c_str(), continuous_type); + } + return continuous_type; +} + +Status GetMemorySize(const OpDescPtr &op_desc, const ge::ConstGeTensorDescPtr &output_desc, uint32_t continuous_type, + int64_t &tensor_size, int64_t &nopadding_size) { + if ((op_desc == nullptr) || (output_desc == nullptr)) { + GELOGE(FAILED, "Input para is nullptr."); + return FAILED; + } + tensor_size = 0; + nopadding_size = 0; + bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); + if (is_nopadding) { + int64_t attr_dim_index; + bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); + if (!get_attr_dim_flag) { + GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); + return FAILED; + } + + // Calculate tensor real size of each piece of data and out size of complete data + int64_t batch_dim_num = 1; + if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, nopadding_size, batch_dim_num, tensor_size) != + SUCCESS) { + GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s.", op_desc->GetName().c_str()); + return FAILED; + } + } else { + if (ge::TensorUtils::GetSize(*output_desc, tensor_size) != ge::SUCCESS) { + GELOGE(FAILED, "GetSize failed."); + return FAILED; + } + } + if ((tensor_size < 0) || (nopadding_size < 0)) { + GELOGE(FAILED, "GetMemorySize for node %s failed.", op_desc->GetName().c_str()); + return FAILED; + } + return SUCCESS; +} + +void AlignMemOffset(int64_t &mem_align_size) { + if (mem_align_size <= 0) { + return; + } + mem_align_size = (mem_align_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; +} + +bool IsContinuousInputConflict(const ge::NodePtr &node, const OpDescPtr &peer_op_desc) { + bool is_peer_output_continuous = false; + // If GetBool fail, is_peer_output_continuous is false. + (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); + + // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and + // continuous output of the previous node is the same, we can support it. If size != 1, there may be + // conflict between the two, we can not support it. + auto peer_output_size = peer_op_desc->GetOutputsSize(); + GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + + bool is_peer_reference = false; + // If GetBool fail, is_peer_reference is false. + (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); + GE_IF_BOOL_EXEC(is_peer_reference, + std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + + " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + + " requires continuous output. There may be conflict between the two." + + "This node is not supported now."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + return true;); + return false; +} + Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { Status ret; for (auto &node : compute_graph_->GetAllNodes()) { - // Get the continuous input type of the node, default is false - bool is_input_continuous = false; - GE_CHECK_NOTNULL(node->GetOpDesc()); - // If GetBool fail, is_input_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); + GE_CHECK_NOTNULL(node); + auto continuous_type = GetContinuousMemoryType(node->GetOpDesc()); // Assign continuous input memory - if (is_input_continuous) { - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); + bool continuous_input = ((continuous_type & kTypeInput) != 0) || ((continuous_type & kTypeInputNoPadding) != 0); + int64_t memory_type = RT_MEMORY_HBM; + if (continuous_input) { int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; - ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type); + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "input"), "Get node memory type failed."); + ret = AssignContinuousInputMemory(node, mem_clean_start, mem_clean_size, memory_type, continuous_type); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; @@ -338,7 +385,6 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { vector input_indexes; // If GetListInt fail, input_indexes is empty. (void) ge::AttrUtils::GetListInt(node->GetOpDesc(), ATOMIC_ATTR_INPUT_INDEX, input_indexes); - if (!input_indexes.empty() && input_indexes[0] == kAllInputAddrIsAtomic) { // check whether there is an atomic conflict between the current node and the peer out node if (!CheckInputIsSupportAtomic(node)) { @@ -350,9 +396,10 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { const auto &in_control_anchor = node->GetInControlAnchor(); GE_CHECK_NOTNULL(in_control_anchor); for (const auto &peer_out_control_anchor : in_control_anchor->GetPeerOutControlAnchors()) { + GE_CHECK_NOTNULL(peer_out_control_anchor); auto peer_out_node = peer_out_control_anchor->GetOwnerNode(); if (peer_out_node->GetType() == ATOMICADDRCLEAN) { - ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}); + ret = SetAtomicCleanAttr(peer_out_node, {mem_clean_start}, {mem_clean_size}, memory_type); if (ret != SUCCESS) { GELOGE(ret, "Failed to set attr for atomic addr clean node %s.", peer_out_node->GetName().c_str()); return ret; @@ -362,23 +409,13 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } } - // Get the reference type of the node, default is false - bool is_ref = false; - // If GetBool fail, is_ref is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - - // Get the continuous output type of the node, default is false - bool is_output_continuous = false; - // If GetBool fail, is_output_continuous is false. - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_OUTPUT, is_output_continuous); - - // If the output is ref type and refers to the ref of an input, the name of the output - // and the input are the same. Ge encounters ref type, finds matching relationship according - // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast - if (!is_ref && is_output_continuous) { // Assign continuous output memory - ret = AssignContinuousOutputMemory(node); + // Assign continuous output memory + bool continuous_output = ((continuous_type & kTypeOutput) != 0) || ((continuous_type & kTypeOutputNoPadding) != 0); + if (continuous_output) { + GE_CHK_STATUS_RET(GetNodeMemoryType(node, memory_type, "output"), "Get node memory type failed."); + ret = AssignContinuousOutputMemory(node, memory_type, continuous_type); if (ret != ge::SUCCESS) { - GELOGE(ret, "Assign reference memory failed!"); + GELOGE(ret, "Assign continuous output memory failed!"); return ret; } } @@ -391,520 +428,181 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type) { + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); - bool continuous_input_alloc = false; - (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, continuous_input_alloc); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + // The head and tail of hcom continuous input should be added 512 + iter->second.mem_offset_ += MEM_ALIGN_SIZE; continuous_mem_start = iter->second.mem_offset_; + int64_t mem_offset = iter->second.mem_offset_; + int64_t extra_memory_size = 0; + bool is_continuous_input_allocated = false; + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, is_continuous_input_allocated); for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + GE_IF_BOOL_EXEC(in_data_anchor == nullptr, continue); auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); GE_IF_BOOL_EXEC(peer_op_desc == nullptr, continue); - bool is_peer_output_continuous = false; - // If GetBool fail, is_peer_output_continuous is false. - (void) ge::AttrUtils::GetBool(peer_op_desc, ATTR_NAME_CONTINUOUS_OUTPUT, is_peer_output_continuous); - - // Get peer node output size, if size == 1(peer node has only one output), continuous input of the node and - // continuous output of the previous node is the same, we can support it. If size != 1, there may be - // conflict between the two, we can not support it. - auto peer_output_size = peer_op_desc->GetOutputsSize(); - GE_IF_BOOL_EXEC(is_peer_output_continuous && (peer_output_size != 1), - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - bool is_peer_reference = false; - // If GetBool fail, is_peer_reference is false. - (void) AttrUtils::GetBool(peer_op_desc, ATTR_NAME_REFERENCE, is_peer_reference); - GE_IF_BOOL_EXEC(is_peer_reference, - std::string error = "Current op" + FmtToStr(node->GetOpDesc()->GetName()) + - " requires continuous input, while the previous op" + FmtToStr(peer_op_desc->GetName()) + - " requires continuous output. There may be conflict between the two." + - "This node is not supported now."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return PARAM_INVALID;); - - vector output_list = peer_op_desc->GetOutputOffset(); - std::vector offsets_for_fusion = {}; - bool has_offset_attr = - AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); - if (peer_out_data_anchor->GetIdx() < static_cast(output_list.size())) { - if (continuous_input_alloc && !has_offset_attr) { - if (in_data_anchor->GetIdx() == 0) { - continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); - } - // can not use else if, incase only one input - if (in_data_anchor->GetIdx() == static_cast(node->GetAllInDataAnchors().size()) - 1) { - int64_t tensor_desc_size = 0; - Status ret = ge::TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), - tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - tensor_desc_size = (tensor_desc_size + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - continuous_mem_size = - output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; - } - GELOGI( - "[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " - "real_size[%u].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), - 0, 0); - continue; - } - - output_list.at(peer_out_data_anchor->GetIdx()) = iter->second.mem_offset_; - } else { - std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - GELOGE(FAILED, "index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return FAILED; - } - peer_op_desc->SetOutputOffset(output_list); - size_t pre_mem_offset = iter->second.mem_offset_; + GE_IF_BOOL_EXEC(IsContinuousInputConflict(node, peer_op_desc), return PARAM_INVALID;); int64_t tensor_desc_size = 0; - if (has_offset_attr) { - if (peer_out_data_anchor->GetIdx() < static_cast(offsets_for_fusion.size())) { - auto offset_for_fusion = offsets_for_fusion[peer_out_data_anchor->GetIdx()]; - iter->second.mem_offset_ += offset_for_fusion; - } else { + int64_t nopadding_size = 0; + int64_t real_size = 0; + std::vector offsets_of_fusion = {}; + bool lx_fusion = AttrUtils::GetListInt(peer_op_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_of_fusion); + lx_fusion = lx_fusion && !offsets_of_fusion.empty(); + if (lx_fusion) { + if (peer_out_data_anchor->GetIdx() >= static_cast(offsets_of_fusion.size())) { std::string error = "fusion: peer node" + FmtToStr(peer_op_desc->GetName()) + " index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } + nopadding_size = offsets_of_fusion[peer_out_data_anchor->GetIdx()]; + tensor_desc_size = nopadding_size; } else { - Status ret = - TensorUtils::GetSize(*(peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx())), tensor_desc_size); - GE_IF_BOOL_EXEC(ret != ge::SUCCESS, GELOGE(FAILED, "GetSize failed."); return FAILED;); - - iter->second.mem_offset_ += tensor_desc_size; - } - - // If set tensor_actual_size, Memory alignment is not required. - int32_t is_tensor_actual_size = 0; - ge::AttrUtils::GetInt(peer_op_desc, ATTR_NAME_GET_TENSOR_ACTUAL_SIZE, is_tensor_actual_size); - if (is_tensor_actual_size == 0) { - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); + if (GetMemorySize(node->GetOpDesc(), peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()), + continuous_type, tensor_desc_size, nopadding_size) != ge::SUCCESS) { + return FAILED; + } } - GELOGI( - "[IMAS]Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " - "real_size[%ld].", node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), - (iter->second.mem_offset_ - pre_mem_offset), tensor_desc_size); - } - - iter->second.mem_offset_ += MEM_ALIGN_SIZE; - if (!continuous_input_alloc) { - continuous_mem_size = iter->second.mem_offset_ - continuous_mem_start; - } - return SUCCESS; -} -Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node) { - GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); - auto out_op_desc = node->GetOpDesc(); - GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); - vector output_list = out_op_desc->GetOutputOffset(); - - if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { - GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", - out_op_desc->GetOutputsSize(), output_list.size()); - return ge::FAILED; - } - - size_t mem_offset = output_list[0]; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset; - int64_t tensor_desc_size = 0; - if (ge::TensorUtils::GetSize(*(out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx())), tensor_desc_size) != - ge::SUCCESS) { - GELOGE(FAILED, "GetSize failed."); - return FAILED; - } - mem_offset += tensor_desc_size; - if (mem_offset <= 0) { + bool is_nopadding = ((continuous_type & kTypeInputNoPadding) != 0) || lx_fusion; + vector output_list = peer_op_desc->GetOutputOffset(); + if (peer_out_data_anchor->GetIdx() >= static_cast(output_list.size())) { + std::string error = "index" + FmtToStr(peer_out_data_anchor->GetIdx()) + " is out of range."; + GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); return FAILED; } - mem_offset = (mem_offset + MEM_ALIGN_SIZE - 1) / MEM_ALIGN_SIZE * MEM_ALIGN_SIZE; - GELOGI( - "[IMAS]Continuous output : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), tensor_desc_size, tensor_desc_size); - } - out_op_desc->SetOutputOffset(output_list); - return ge::SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty node name:%s", node->GetName().c_str()); - return FAILED; - } - output_list.at(0) = mem_offset_reuse; - op_desc->SetOutputOffset(output_list); - GELOGI("Set virtual input node %s output offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); - - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; - } - size_t extra_memory_size = 0; - for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { - auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); - auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); - vector output_offsets = peer_op_desc->GetOutputOffset(); - if (peer_out_data_anchor->GetIdx() >= static_cast(output_offsets.size())) { - GELOGE(ge::FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); - return ge::FAILED; + // when continuous input has been allocated first input is beginning offset + bool is_allocated_first_input = is_continuous_input_allocated && (in_data_anchor->GetIdx() == 0); + if (is_allocated_first_input) { + mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); + continuous_mem_start = output_list.at(peer_out_data_anchor->GetIdx()); + } else { + // set offset for input + output_list.at(peer_out_data_anchor->GetIdx()) = mem_offset; + peer_op_desc->SetOutputOffset(output_list); } - output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(output_offsets); - size_t pre_mem_offset = mem_offset_reuse; - // Calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = peer_op_desc->GetOutputDescPtr(peer_out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - peer_op_desc->GetName().c_str(), peer_out_data_anchor->GetIdx()); - return FAILED; + int64_t align_size = tensor_desc_size; + if (is_nopadding) { + mem_offset += nopadding_size; + extra_memory_size += (tensor_desc_size - nopadding_size); + real_size = nopadding_size; + } else { + ge::AlignMemOffset(align_size); + mem_offset += align_size; + // The head and tail of hcom continuous input should be added 512 + extra_memory_size = MEM_ALIGN_SIZE; + real_size = tensor_desc_size; } - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), - peer_out_data_anchor->GetIdx(), pre_mem_offset, peer_op_desc->GetStreamId(), out_size, - output_mem_size); + GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " + "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), + output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, + is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); } - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual input node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousInputMemory() { - map> mem_reuse_virtual_input_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - if (attr_reuse && attr_continuous) { - if (op_desc->GetOutputsSize() != kVirtualInputNodeOutputSize) { - // When current virtual node has several outputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one output is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(op_desc->GetOutputsSize()) + " outputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "input"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(memory_type); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual input node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualInputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual input node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - GELOGE(FAILED, "Cannot find key string [%s] of multi-batch in name of virtual input node, node name: %s.", - kMbatchNodeNameFlag, n->GetName().c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_input_nodes; - if (mem_reuse_virtual_input_nodes_map.count(fixed_name) != 0) { - parallel_virtual_input_nodes = mem_reuse_virtual_input_nodes_map[fixed_name]; - } - parallel_virtual_input_nodes.emplace_back(n); - mem_reuse_virtual_input_nodes_map[fixed_name] = parallel_virtual_input_nodes; - } - } - } - - int32_t mem_reuse_model = 0; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_input_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual input nodes failed."); - return FAILED; + mem_offset += extra_memory_size; + ge::AlignMemOffset(mem_offset); + continuous_mem_size = mem_offset - continuous_mem_start; + if (is_continuous_input_allocated) { + // not allocate memory here, so no need add 512 in header + iter->second.mem_offset_ -= MEM_ALIGN_SIZE; + } else { + iter->second.mem_offset_ = mem_offset; } return SUCCESS; } -Status GraphMemoryAssigner::ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse) { - OpDescPtr op_desc = node->GetOpDesc(); - - // 1. set memory of to be reused input tensor +Status GetFirstInputPeerOutOutputOffset(const ge::NodePtr &node, int64_t &mem_offset) { auto in_data_anchor_list = node->GetAllInDataAnchors(); + if (in_data_anchor_list.empty()) { + GELOGE(FAILED, "Node %s's in data anchor is empty.", node->GetName().c_str()); + return FAILED; + } auto peer_out_data_anchor = in_data_anchor_list.at(0)->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_data_anchor); + GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, GELOGE(ge::FAILED, "peer_out_data_anchor is null."); + return ge::FAILED); auto peer_op_desc = peer_out_data_anchor->GetOwnerNode()->GetOpDesc(); - GE_CHECK_NOTNULL(peer_op_desc); + GE_IF_BOOL_EXEC(peer_op_desc == nullptr, GELOGE(ge::FAILED, "peer_op_desc is null."); return ge::FAILED); vector in_node_output_offsets = peer_op_desc->GetOutputOffset(); if (peer_out_data_anchor->GetIdx() >= static_cast(in_node_output_offsets.size())) { GELOGE(FAILED, "Index : %d is out of range.", peer_out_data_anchor->GetIdx()); return FAILED; } - in_node_output_offsets.at(peer_out_data_anchor->GetIdx()) = mem_offset_reuse; - peer_op_desc->SetOutputOffset(in_node_output_offsets); - GELOGI("Set virtual output node %s input data offset to %zu.", op_desc->GetName().c_str(), mem_offset_reuse); + mem_offset = in_node_output_offsets.at(peer_out_data_anchor->GetIdx()); + return SUCCESS; +} - // 2. set memory of output tensor - vector output_list = op_desc->GetOutputOffset(); - if (output_list.empty()) { - GELOGE(FAILED, "Outputoffset is empty, node name: %s", node->GetName().c_str()); - return FAILED; - } - if (op_desc->GetOutputsSize() > output_list.size()) { - GELOGE(FAILED, "The size %zu of op_desc is more than output_list's size %zu.", op_desc->GetOutputsSize(), - output_list.size()); - return FAILED; - } - int64_t attr_dim_index; - bool get_attr_dim_flag = ge::AttrUtils::GetInt(op_desc, ATTR_NAME_REUSE_INPUT_ON_DIM_INDEX, attr_dim_index); - if (!get_attr_dim_flag) { - GELOGE(FAILED, "Get attr _reuse_input_on_dim_index failed."); - return FAILED; +Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, + uint32_t continuous_type) { + GELOGI("Current node %s needs continuous output.", node->GetName().c_str()); + auto out_op_desc = node->GetOpDesc(); + GE_IF_BOOL_EXEC(out_op_desc == nullptr, GELOGE(ge::FAILED, "out_op_desc is null."); return ge::FAILED); + vector output_list = out_op_desc->GetOutputOffset(); + if ((out_op_desc->GetOutputsSize() > output_list.size()) || (output_list.size() == 0)) { + GELOGE(ge::FAILED, "The size %zu of node output desc is more than output_list's size %zu.", + out_op_desc->GetOutputsSize(), output_list.size()); + return ge::FAILED; } - size_t extra_memory_size = 0; - for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { - output_list[out_data_anchor->GetIdx()] = mem_offset_reuse; - size_t pre_mem_offset = mem_offset_reuse; - - // calculate tensor real size of each piece of data and out size of complete data - ge::ConstGeTensorDescPtr output_desc = op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()); - GE_CHECK_NOTNULL(output_desc); - int64_t output_mem_size; - int64_t batch_dim_num = 1; - int64_t out_size; - if (CalculateTensorRealSizeAndOutSize(output_desc, attr_dim_index, output_mem_size, batch_dim_num, out_size) != - SUCCESS) { - GELOGE(FAILED, "CalculateTensorRealSizeAndOutSize failed for node %s output [%d].", - op_desc->GetName().c_str(), out_data_anchor->GetIdx()); - return FAILED; + int64_t mem_offset = 0; + bool is_nopadding = ((continuous_type & kTypeOutputNoPadding) != 0); + if (is_nopadding) { + // out tensor memory must be reused input tensor memory + if (GetFirstInputPeerOutOutputOffset(node, mem_offset) != SUCCESS) { + return ge::FAILED; } + } else { + // Get the reference type of the node, default is false + bool is_ref = false; + // If GetBool fail, is_ref is false. + (void) ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_REFERENCE, is_ref); - mem_offset_reuse += output_mem_size; - extra_memory_size = extra_memory_size + out_size - output_mem_size; - - GELOGI("[IMAS]Virtual node optimize: set %s name[%s] output[%d] offset to [%zu], size[%ld], real_size[%ld].", - node->GetOwnerComputeGraph()->GetName().c_str(), op_desc->GetName().c_str(), out_data_anchor->GetIdx(), - pre_mem_offset, out_size, output_mem_size); - } - op_desc->SetOutputOffset(output_list); - mem_offset_reuse += extra_memory_size; - size_t after_mem_offset = mem_offset_reuse; - GELOGI("After reassign virtual output node[name: %s, type: %s] memory, memory offset = %zu.", - op_desc->GetName().c_str(), op_desc->GetType().c_str(), after_mem_offset); - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignReuseAndNoPaddingContinuousOutputMemory() { - map> mem_reuse_virtual_output_nodes_map; - int64_t memory_type = RT_MEMORY_HBM; - for (const auto &n : compute_graph_->GetAllNodes()) { - OpDescPtr op_desc = n->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - bool attr_continuous = false; - bool get_continuous_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_NOPADDING_CONTINUOUS_OUTPUT, attr_continuous); - GE_IF_BOOL_EXEC(!get_continuous_flag, continue); - bool attr_reuse = false; - bool get_reuse_flag = ge::AttrUtils::GetBool(op_desc, ATTR_NAME_OUTPUT_REUSE_INPUT, attr_reuse); - GE_IF_BOOL_EXEC(!get_reuse_flag, continue); - - if (attr_reuse && attr_continuous) { - auto in_data_anchor_list = n->GetAllInDataAnchors(); - if (in_data_anchor_list.size() != kVirtualOutputNodeInputSize) { - // When current virtual node has several inputs, can't directly determine which input is the tensor for reuse. - std::string error = "Only one input is supported, current virtual node" + FmtToStr(n->GetName()) + - " has " + FmtToStr(in_data_anchor_list.size()) + " inputs."; - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GE_CHK_STATUS_RET(GetNodeMemoryType(n, memory_type, "output"), "Get node memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - GELOGD("Start to reassign memory for virtual output node, memory offset = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - string batch_label_string; - // Not all ops have ATTR_NAME_BATCH_LABEL, no need to check return value, only check out parameter - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string.empty()) { - size_t node_mem_offset = iter->second.mem_offset_; - // No ATTR_NAME_BATCH_LABEL, no need to reuse memory. - Status status = ReAssignVirtualOutputNodeMemory(n, node_mem_offset); - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output node failed, node name: %s.", n->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual output node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - } else { - // Has ATTR_NAME_BATCH_LABEL, for dynamic multi-batch node, need to reuse memory. - string current_node_full_name = op_desc->GetName(); - size_t pos = current_node_full_name.find(kMbatchNodeNameFlag); - if (pos == string::npos) { - std::string error = "Cannot find key string" + FmtToStr(kMbatchNodeNameFlag) + - " of multi-batch in name of virtual output node, the node name is " + FmtToStr(n->GetName()); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - string fixed_name = current_node_full_name.substr(0, pos); - vector parallel_virtual_output_nodes; - if (mem_reuse_virtual_output_nodes_map.count(fixed_name) != 0) { - parallel_virtual_output_nodes = mem_reuse_virtual_output_nodes_map[fixed_name]; - } - parallel_virtual_output_nodes.emplace_back(n); - mem_reuse_virtual_output_nodes_map[fixed_name] = parallel_virtual_output_nodes; - } + // If the output is ref type and refers to the ref of an input, the name of the output + // and the input are the same. Ge encounters ref type, finds matching relationship according + // to the names of input and output, and allocates the same memory address, eg: HCOMBroadcast + if (is_ref) { + GELOGI("Current node %s no needs assign continuous output because reference input by name.", + node->GetName().c_str()); + return SUCCESS; } + mem_offset = output_list[0]; } - int32_t mem_reuse_model = 1; - if (ReAssignVirtualNodesMemory(mem_reuse_virtual_output_nodes_map, mem_reuse_model) != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual output nodes failed."); - return FAILED; - } - return SUCCESS; -} - -Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, - int32_t mem_reuse_model) { - // Find max batch label value - string max_batch_label; - GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), - "Get max batch label failed."); - PrintMemoryOffset(); - vector nodes_mem_offset_list; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - int64_t memory_type = RT_MEMORY_HBM; - GE_CHK_STATUS_RET(GetNodeListMemoryType(virtual_nodes_list, mem_reuse_model, memory_type), - "Get node list memory type failed."); - auto iter = memory_offset_.find(memory_type); - if (iter == memory_offset_.end()) { - std::string error = "Memory offset does not have memory type" + FmtToStr(RT_MEMORY_HBM); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); + for (auto &out_data_anchor : node->GetAllOutDataAnchors()) { + output_list[out_data_anchor->GetIdx()] = mem_offset; + int64_t tensor_desc_size = 0; + int64_t nopadding_size = 0; + if (GetMemorySize(out_op_desc, out_op_desc->GetOutputDescPtr(out_data_anchor->GetIdx()), continuous_type, + tensor_desc_size, nopadding_size) != ge::SUCCESS) { return FAILED; } - size_t max_batch_node_mem_offset = iter->second.mem_offset_; - nodes_mem_offset_list.emplace_back(max_batch_node_mem_offset); - for (auto &i_node : virtual_nodes_list) { - // Op_desc is not nullptr, it has been checked. - OpDescPtr op_desc = i_node->GetOpDesc(); - string batch_label_string; - // All ops must have ATTR_NAME_BATCH_LABEL, no need to check return value. - (void) ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label_string); - if (batch_label_string == max_batch_label) { - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, max_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, max_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } - iter->second.mem_offset_ = max_batch_node_mem_offset; - AlignMemOffset(MEM_ALIGN_SIZE, memory_type); - GELOGD("After reassign memory for virtual node, align memory = %zu, memory type = %ld.", - iter->second.mem_offset_, memory_type); - // Only assign memory of max batch nodes. - break; - } - } - } - PrintMemoryOffset(); - size_t memory_reuse_index = 0; - for (auto &i_map : mem_reuse_nodes_map) { - vector virtual_nodes_list = i_map.second; - for (auto &i_node : virtual_nodes_list) { - size_t remaining_batch_node_mem_offset = nodes_mem_offset_list[memory_reuse_index]; - Status status = SUCCESS; - if (mem_reuse_model == kVirtualInputNodeMemoryReuse) { - status = ReAssignVirtualInputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else if (mem_reuse_model == kVirtualOutputNodeMemoryReuse) { - status = ReAssignVirtualOutputNodeMemory(i_node, remaining_batch_node_mem_offset); - } else { - std::string error = "Invalid parameter memory reuse model, which is " + FmtToStr(mem_reuse_model); - GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str()); - return FAILED; - } - - if (status != SUCCESS) { - GELOGE(FAILED, "Reassign memory of virtual node failed, node name: %s.", i_node->GetName().c_str()); - return FAILED; - } + if (is_nopadding) { + mem_offset += nopadding_size; + } else { + mem_offset += tensor_desc_size; + ge::AlignMemOffset(mem_offset); } - memory_reuse_index++; + GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" + " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), + node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), + output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, + is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); } - return SUCCESS; + out_op_desc->SetOutputOffset(output_list); + return ge::SUCCESS; } Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { @@ -943,10 +641,8 @@ Status GraphMemoryAssigner::ReAssignAtomicMemory(bool is_loop_graph) { } int64_t atomic_mem_size = static_cast(mem_iter->second.mem_offset_) - atomic_mem_start; - GE_CHECK_NOTNULL(mem_assigner_); - GE_CHECK_NOTNULL(mem_assigner_->GetPriorityAssinger()); - if ((atomic_mem_size != 0) && (iter_batch.first == mem_assigner_->GetPriorityAssinger()->GetMaxBatchLabel())) { - GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}), + if (atomic_mem_size != 0) { + GE_CHK_STATUS_RET(SetAtomicCleanAttr(iter.first, {atomic_mem_start}, {atomic_mem_size}, RT_MEMORY_HBM), "Failed to set attr for atomic addr clean node %s.", iter.first->GetName().c_str()); } } @@ -1084,7 +780,7 @@ Status GraphMemoryAssigner::AssignConnectNetOutputAtomicMemory(vector & } // All atomic nodes use atomic_addr_clean op independently, so we need to set the attr separately. - if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end) != SUCCESS) { + if (SetIndependentAtomicAttr(node, original_atomic_mem_start, mem_offset_end, RT_MEMORY_HBM) != SUCCESS) { GELOGE(FAILED, "Failed to set atomic attr separately."); return FAILED; } @@ -1231,9 +927,10 @@ Status GraphMemoryAssigner::AssignAtomicOutputMemory(const ge::NodePtr &node, ve output_list[output_index] = iter->second.mem_offset_; std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); - GELOGI("[IMAS]Atomic output : Set %s name[%s] output[%ld] offset to [%zu] stream_id[%ld] size[%ld] real_size[%ld]" - " batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), output_index, - iter->second.mem_offset_, op_desc->GetStreamId(), size, size, batch_label.c_str()); + GELOGI("[IMAS]Atomic output : Set %s name[%s] optype[%s] output[%ld] offset to [%zu] stream_id[%ld] memtype[%ld] " + "size[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), + node->GetType().c_str(), output_index, iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, + size, size, batch_label.c_str()); iter->second.mem_offset_ += size; AlignMemOffset(MEM_ALIGN_SIZE, RT_MEMORY_HBM); @@ -1309,10 +1006,10 @@ Status GraphMemoryAssigner::AssignOrdinaryAtomicWorkspaceMemory(const ge::OpDesc std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic ordinary workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " - "size[%ld] real_size[%ld] batch[%s].", - compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, + "[IMAS]Atomic ordinary workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "memtype[%ld] size[%ld] real_size[%ld] batch[%s].", + compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, + mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; @@ -1350,10 +1047,10 @@ Status GraphMemoryAssigner::AssignFusionAtomicWorkspaceMemory(const ge::OpDescPt std::string batch_label; (void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); GELOGI( - "[IMAS]Atomic fusion workspace : Set %s name[%s] workspace[%lu] offset to [%zu] stream_id[%ld] size[%ld] " - "real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), op_desc->GetName().c_str(), workspace_index, - mem_type_iter->second.mem_offset_, op_desc->GetStreamId(), workspace_size, workspace_size, - batch_label.c_str()); + "[IMAS]Atomic fusion workspace : Set %s name[%s] optype[%s] workspace[%lu] offset to [%zu] stream_id[%ld] " + "memtype[%ld] ssize[%ld] real_size[%ld] batch[%s].", compute_graph_->GetName().c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str(), workspace_index, mem_type_iter->second.mem_offset_, + op_desc->GetStreamId(), RT_MEMORY_HBM, workspace_size, workspace_size, batch_label.c_str()); mem_type_iter->second.mem_offset_ += workspace_size; mem_offset_end.emplace_back(mem_type_iter->second.mem_offset_); @@ -1429,7 +1126,7 @@ ge::Status GraphMemoryAssigner::SetInputOffset() { return FAILED; } for (auto pair : memory_offset_) { - GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memory type[%ld]", compute_graph_->GetName().c_str(), + GEEVENT("[IMAS]AfterAssignMemory : %s memoffset[%zu], memtype[%ld]", compute_graph_->GetName().c_str(), pair.second.mem_offset_, pair.first); } @@ -1598,7 +1295,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node) const { } Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const vector &mem_offset_end) { + const vector &mem_offset_end, int64_t memory_type) { GELOGD("Start to set independent atomic attr, atomic_addr_clean memory offset start is %ld", atomic_mem_start); // Parsing offset and size vectors @@ -1627,7 +1324,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in GELOGD("Current node memory_offset vector size is %zu, node name %s, node type is %s.", memory_offset_size.size(), peer_out_node_desc->GetName().c_str(), peer_out_node_desc->GetType().c_str()); if (peer_out_node_desc->GetType() == ATOMICADDRCLEAN) { - if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size) != SUCCESS) { + if (SetAtomicCleanAttr(peer_out_node, memory_offset_start, memory_offset_size, memory_type) != SUCCESS) { GELOGE(FAILED, "Set atomic clean attr failed."); return FAILED; } @@ -1638,7 +1335,7 @@ Status GraphMemoryAssigner::SetIndependentAtomicAttr(const ge::NodePtr &node, in } ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const vector &atomic_mem_start, - const vector &atomic_mem_size) { + const vector &atomic_mem_size, int64_t memory_type) { auto node_op_desc = node->GetOpDesc(); if (node_op_desc != nullptr) { GELOGD("Node %s, set atomic clean attr start.", node->GetName().c_str()); @@ -1677,9 +1374,10 @@ ge::Status GraphMemoryAssigner::SetAtomicCleanAttr(const NodePtr &node, const ve } string atomic_mem_size_str = ss.str(); - GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] output[0] offset to [%s] streamid[%ld] size[%s]", - node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), - atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), atomic_mem_size_str.c_str()); + GELOGI("[IMAS]SetAtomicCleanAttr : Set %s atomic_node name[%s] optype[%s] output[0] offset to [%s] streamid[%ld]" + " memtype[%ld] size[%s]",node->GetOwnerComputeGraph()->GetName().c_str(), node_op_desc->GetName().c_str(), + node->GetType().c_str(), atomic_mem_start_str.c_str(), node->GetOpDesc()->GetStreamId(), memory_type, + atomic_mem_size_str.c_str()); } return SUCCESS; } diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index def24287..a380e594 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -119,31 +119,15 @@ class GraphMemoryAssigner { /// ge::Status ReAssignContinuousMemory(bool is_loop_graph); - ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); - - ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); - - ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); - - ge::Status ReAssignVirtualNodesMemory(map> &mem_reuse_nodes_map, int32_t mem_reuse_model); - - ge::Status GetMaxBatchLabel(const map> &mem_reuse_virtual_nodes_map, - int32_t mem_reuse_model, string &max_batch_label); - - ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, - int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); - ge::Status ReAssignAtomicMemory(bool is_loop_graph); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type); + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); - ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); + ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); /// /// @brief check the input of node whether support atomic attr @@ -169,10 +153,10 @@ class GraphMemoryAssigner { ge::Status AssignConnectNetOutputAtomicMemory(vector &connect_netoutput_nodes); ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, - const std::vector &mem_offset_end); + const std::vector &mem_offset_end, int64_t memory_type); ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector &atomic_mem_start, - const std::vector &atomic_mem_size); + const std::vector &atomic_mem_size, int64_t memory_type); ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 2edc830d..8bd7d32e 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -234,6 +234,19 @@ Status TaskGenerator::SaveFusionNodes(map> &fusion return SUCCESS; } +bool TaskGenerator::IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const { + auto parent_graph_ptr = graph->GetParentGraph(); + if (parent_graph_ptr == nullptr) { + return false; + } + auto root_graph_ptr = GraphUtils::FindRootGraph(parent_graph_ptr); + if (root_graph_ptr == nullptr) { + return false; + } + + return root_graph_ptr->GetGraphUnknownFlag(); +} + Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &graph, vector &task_def_list, map &op_name_map) { GELOGD("Beign to generate task, graph name is %s.", graph->GetName().c_str()); @@ -274,7 +287,6 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); - uint64_t all_reduce_node_idx = 0; for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); @@ -293,7 +305,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra // Part2: Call auto fusion_task_info = FusionTaskInfo{run_context, graph, node, op_desc, node_index, ge_lib, - ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes, all_reduce_node_idx}; + ops_kernel_manager, task_def_list, op_name_map, profiling_point, all_reduce_nodes}; GE_CHK_STATUS_RET(GenerateTaskForFusionNode(fusion_task_info, fusion_nodes, fusion_nodes_seen), "Call GenerateTaskForFusionNode node:%s(%s) failed", name.c_str(), type.c_str()); // continue directly @@ -317,8 +329,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra type.c_str()); // Profiling task size_t task_list_size_before = task_def_list.size(); - GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); int64_t op_id = op_desc->GetId(); // Compatible with dynamic shape scenes, the default is 0 int64_t stream_id = 0; @@ -338,8 +349,7 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra return ret; } // Profiling task - GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_node_idx)); + GE_CHK_STATUS_RET(InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); size_t task_list_size_after = task_def_list.size(); // If tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -382,7 +392,6 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info auto &op_name_map = fusion_task_info.op_name_map; auto &profiling_point = fusion_task_info.profiling_point; auto &all_reduce_nodes = fusion_task_info.all_reduce_nodes; - auto &all_reduce_idx = fusion_task_info.all_reduce_node_idx; // If op_desc have this attr, call nodes with same group key in a stream together if (ge::AttrUtils::GetInt(fusion_op_desc, ATTR_NAME_FUSION_GROUP_KEY, group_key) && (fusion_nodes_seen.count(node.get()) == 0)) { @@ -429,8 +438,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return INTERNAL_ERROR; } // profiling task - (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); run_context.stream = run_context.graphStreamList[stream_id]; GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld] task.", op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id); @@ -443,8 +451,7 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } // profiling task - (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, - node_index, task_def_list, all_reduce_idx); + (void)InsertProfilingTaskAfter(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list); size_t task_list_size_after = task_def_list.size(); // if tasks is reduced if (task_list_size_after < task_list_size_before) { @@ -526,6 +533,13 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { return GE_GRAPH_GRAPH_NODE_NULL; } + int64_t node_index = 0; + for (auto &node : all_nodes) { + OpDescPtr op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + op_desc->SetId(node_index++); + } + map> all_stream_ops; for (auto &node : all_nodes) { OpDescPtr op_desc = node->GetOpDesc(); @@ -673,7 +687,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP } } if (graph->GetNeedIteration()) { - if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { + if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { profiling_point.end_index.insert(current_idx); GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", op_desc->GetName().c_str(), current_idx); @@ -842,6 +856,13 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi GELOGD("Profiling is not open."); return SUCCESS; } + + // subgraph of dynamic graph no need to find index, has been found in parent graph + if (IsSubGraphOfDynamicGraph(graph)) { + GELOGI("Graph[%s] is subgraph of dynamic graph, no nned to find index.", graph->GetName().c_str()); + return SUCCESS; + } + GELOGI("Start get FP/BP index."); std::string fp_point_str; std::string bp_point_str; @@ -879,9 +900,47 @@ Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, Profi return SUCCESS; } +Status TaskGenerator::InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { + bool is_insert_all_reduce_task = false; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + is_insert_all_reduce_task = true; + } + if (!is_insert_all_reduce_task) { + for (size_t i = 0; i < all_reduce_nodes.size(); i++) { + if (all_reduce_nodes[i] == node_index) { + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArStartLogid; + is_insert_all_reduce_task = true; + break; + } + } + } + + if (is_insert_all_reduce_task) { + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); + TaskDef ar_task_def; + ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); + ar_task_def.set_stream_id(op_desc->GetStreamId()); + LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); + if (ar_log_def != nullptr) { + ar_log_def->set_logid(ar_log_id); + ar_log_def->set_notify(false); + } + task_def_list.push_back(ar_task_def); + } + + return SUCCESS; +} + Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t &all_reduce_node_idx) { + vector &task_def_list) { const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || ProfilingManager::Instance().ProfilingTrainingTraceOn(); @@ -924,19 +983,31 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); - uint64_t all_reduce_task_idx = 0; + if (is_all_reduce) { + (void)InsertProfilingArTaskBefore(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); + } + + return SUCCESS; +} + +Status TaskGenerator::InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task) { bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; + int64_t ar_log_id = 0xFFFF; + if (is_insert_bp_profiling_task) { + (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, ar_log_id); + ar_log_id += 1; is_insert_all_reduce_task = true; } - if (is_all_reduce) { - all_reduce_node_idx++; - } if (!is_insert_all_reduce_task) { for (size_t i = 0; i < all_reduce_nodes.size(); i++) { if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; + GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(i, kProfilingArStep), + GELOGE(FAILED, "Multiply result is out of range."); + return FAILED); + ar_log_id = i * kProfilingArStep + kProfilingArEndLogid; is_insert_all_reduce_task = true; break; } @@ -944,28 +1015,24 @@ Status TaskGenerator::InsertProfilingTaskBefore(const OpDescPtr &op_desc, const } if (is_insert_all_reduce_task) { - GELOGI("The start allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); + GELOGI("The start allreduce operator is %s, idx %u, log_id %ld", op_desc->GetName().c_str(), node_index, ar_log_id); TaskDef ar_task_def; ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); ar_task_def.set_stream_id(op_desc->GetStreamId()); LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); if (ar_log_def != nullptr) { - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArStartLogid; - ar_log_def->set_logid(log_id); + ar_log_def->set_logid(ar_log_id); ar_log_def->set_notify(false); - (void)ge::AttrUtils::SetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); } task_def_list.push_back(ar_task_def); } + return SUCCESS; } Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, vector &all_reduce_nodes, uint32_t node_index, - vector &task_def_list, uint64_t all_reduce_node_idx) { + vector &task_def_list) { GE_CHECK_NOTNULL(op_desc); const char *profiling_mode = std::getenv(kProfilingMode); bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || @@ -1010,36 +1077,11 @@ Status TaskGenerator::InsertProfilingTaskAfter(const OpDescPtr &op_desc, const P task_def_list.emplace_back(end_task_def); } - uint32_t all_reduce_task_idx = 0; - bool is_insert_all_reduce_task = false; - if (is_all_reduce && is_insert_bp_profiling_task) { - all_reduce_task_idx = all_reduce_node_idx; - is_insert_all_reduce_task = true; - } - - for (size_t i = 0; i < all_reduce_nodes.size(); i++) { - if (all_reduce_nodes[i] == node_index) { - all_reduce_task_idx = i; - is_insert_all_reduce_task = true; - break; - } + if (is_all_reduce) { + (void)InsertProfilingArTaskAfter(op_desc, all_reduce_nodes, node_index, + task_def_list, is_insert_bp_profiling_task); } - if (is_insert_all_reduce_task) { - GELOGI("The end allreduce operator is %s, idx %u", op_desc->GetName().c_str(), node_index); - TaskDef ar_task_def; - ar_task_def.set_type(RT_MODEL_TASK_PROFILER_TRACE); - ar_task_def.set_stream_id(op_desc->GetStreamId()); - LogTimeStampDef *ar_log_def = ar_task_def.mutable_log_timestamp(); - GE_CHECK_NOTNULL(ar_log_def); - GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(all_reduce_task_idx, kProfilingArStep), - GELOGE(FAILED, "Multiply result is out of range."); - return FAILED); - auto log_id = all_reduce_task_idx * kProfilingArStep + kProfilingArEndLogid; - ar_log_def->set_logid(log_id); - ar_log_def->set_notify(false); - task_def_list.emplace_back(ar_task_def); - } return SUCCESS; } diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index 5970954c..9f12d568 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -129,10 +129,16 @@ class TaskGenerator { std::vector &all_reduce_nodes) const; Status InsertProfilingTaskBefore(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t &all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskBefore(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_listy, + bool is_insert_bp_profiling_task); Status InsertProfilingTaskAfter(const OpDescPtr &op_desc, const ProfilingPoint &profiling_point, std::vector &all_reduce_nodes, uint32_t node_index, - std::vector &task_def_list, uint64_t all_reduce_node_idx); + std::vector &task_def_list); + Status InsertProfilingArTaskAfter(const OpDescPtr &op_desc, std::vector &all_reduce_nodes, + uint32_t node_index, std::vector &task_def_list, + bool is_insert_bp_profiling_task); static bool IsProfPoint(const OpDescPtr &op, const std::string &name); @@ -155,6 +161,8 @@ class TaskGenerator { Status SetKnownShapeStream(RunContext &run_context, int64_t stream_id); + bool IsSubGraphOfDynamicGraph(const ComputeGraphPtr &graph) const; + uint8_t *var_mem_base_ = nullptr; uint64_t var_mem_size_ = 0; }; diff --git a/ge/graph/load/model_manager/data_dumper.cc b/ge/graph/load/model_manager/data_dumper.cc index 947aac1d..235cffa9 100644 --- a/ge/graph/load/model_manager/data_dumper.cc +++ b/ge/graph/load/model_manager/data_dumper.cc @@ -820,6 +820,7 @@ Status DataDumper::UnloadDumpInfo() { for (const auto &op_iter : op_list_) { aicpu::dump::Task task; task.set_task_id(op_iter.task_id); + task.set_stream_id(op_iter.stream_id); op_mapping_info.mutable_task()->Add(std::move(task)); } auto ret = ExecuteUnLoadDumpInfo(op_mapping_info); @@ -834,7 +835,6 @@ void DataDumper::DumpShrink() { compute_graph_.reset(); input_map_.clear(); ref_info_.clear(); - op_list_.clear(); } void DataDumper::PrintCheckLog(string &dump_list_key) { diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 136a041c..be33588a 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -446,20 +446,23 @@ void DavinciModel::InitRuntimeParams() { runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); } -void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { - const set hcom_opp_types({ - HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, - HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE - }); - +void DavinciModel::CheckHasHcomOp() { + Graph graph = ge_model_->GetGraph(); + auto compute_graph = GraphUtils::GetComputeGraph(graph); + if (compute_graph == nullptr) { + return; + } for (const auto &node : compute_graph->GetAllNodes()) { OpDescPtr op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); - if (hcom_opp_types.count(op_desc->GetType()) > 0) { - uint32_t stream_id = static_cast(op_desc->GetStreamId()); - hcom_streams_.emplace(stream_id); - GELOGD("hcom stream: %u.", stream_id); - } + GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || + (op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || + (op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || + (op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || + (op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || + (op_desc->GetType() == HCOMREDUCE)), + uint32_t stream_id = static_cast(op_desc->GetStreamId()); + (void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); } } @@ -621,6 +624,7 @@ void DavinciModel::OpDebugUnRegister() { // initialize op sequence and call initialization function of each op respectively Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { // validating params + GELOGI("Priority is %d", priority_); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(priority_ < 0 || priority_ > 7, return PARAM_INVALID, "Priority must between 0-7, now is %d", priority_); GE_CHK_BOOL_RET_STATUS(ge_model_ != nullptr, PARAM_INVALID, "GeModel is null."); @@ -638,7 +642,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size name_ = ge_model_->GetName(); (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); - CheckHasHcomOp(compute_graph); + CheckHasHcomOp(); vector huge_stream_list; (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); @@ -1024,7 +1028,7 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ const vector &output_op_list) { GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); for (auto &item : data_by_index) { - const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); + auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); input_addrs_list_.emplace_back(output_addrs); @@ -1032,18 +1036,14 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); - GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); if (item.second->GetType() == AIPP_DATA_TYPE) { GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); is_dynamic_aipp_ = true; } } - vector out_node_name; - (void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); - GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); for (const auto &op_desc : output_op_list) { - const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); + auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); output_addrs_list_.emplace_back(input_addrs); @@ -1061,11 +1061,10 @@ Status DavinciModel::GenInputOutputInfo(const map &data_by_ if (InitOutputTensorInfo(op_desc) != SUCCESS) { return INTERNAL_ERROR; } - - GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); } - return SUCCESS; + GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); + return InitOutputDescInfo(output_op_list); } bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { @@ -1810,16 +1809,16 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector &user_inp /// Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { - GELOGW("there is not AIPP related with index %u.", index); + GELOGW("There is not AIPP related with index %u.", index); return SUCCESS; } domi::AippOpParams aipp_params; GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); - GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", + GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); AippConfigInfo aipp_info; @@ -1981,24 +1980,27 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, } } -Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { - GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); +Status DavinciModel::InitInputDescInfo(const map &data_by_index) { + for (const auto &item : data_by_index) { + const auto op_desc = item.second; + GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); - InputOutputDescInfo input; - ShapeDescription dims_info; - Format format = op_desc->GetInputDescPtr(0)->GetFormat(); - CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); + InputOutputDescInfo input; + ShapeDescription dims_info; + Format format = op_desc->GetInputDescPtr(0)->GetFormat(); + CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); - input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); - input.name = op_desc->GetName(); - int64_t input_size = 0; - GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); - input.size = input_size; - input_formats_.push_back(format); - input_descs_.push_back(input); + input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); + input.name = op_desc->GetName(); + int64_t input_size = 0; + GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); + input.size = input_size; + input_formats_.push_back(format); + input_descs_.push_back(input); - input.shape_info = dims_info; - input_descs_dims_.push_back(input); + input.shape_info = dims_info; + input_descs_dims_.push_back(input); + } return SUCCESS; } @@ -2064,31 +2066,37 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); } -Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name) { - uint32_t out_size = static_cast(op_desc->GetInputsSize()); - for (uint32_t i = 0; i < out_size; ++i) { - string output_name; - InputOutputDescInfo output; - uint32_t format_result; - CreateOutput(i, op_desc, output, format_result); - - std::vector src_name = op_desc->GetSrcName(); - std::vector src_index = op_desc->GetSrcIndex(); - GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, - "construct output_name failed."); - // forward compatbility, if old om has no out_node_name, need to return output follow origin way - if (out_size == out_node_name.size()) { - // neweast plan, the index will add to name during generate model. - bool contains_colon = out_node_name[i].find(":") != std::string::npos; - output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); - } else { - output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); +Status DavinciModel::InitOutputDescInfo(const vector &output_op_list) { + GELOGD("Output node size: %zu", output_op_list.size()); + vector out_node_name; + (void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); + for (const auto &op_desc : output_op_list) { + uint32_t out_size = static_cast(op_desc->GetInputsSize()); + for (uint32_t index = 0; index < out_size; index++) { + string output_name; + InputOutputDescInfo output; + uint32_t format_result; + CreateOutput(index, op_desc, output, format_result); + + std::vector src_name = op_desc->GetSrcName(); + std::vector src_index = op_desc->GetSrcIndex(); + GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, + "construct output_name failed."); + // forward compatbility, if old om has no out_node_name, need to return output follow origin way + if (out_size == out_node_name.size()) { + // neweast plan, the index will add to name during generate model. + bool contains_colon = out_node_name[index].find(":") != std::string::npos; + output_name = + contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]); + } else { + output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + + std::to_string(src_index[index]); + } + output.name = output_name; + output_descs_.push_back(output); + output_formats_.push_back(format_result); } - output.name = output_name; - output_descs_.push_back(output); - output_formats_.push_back(format_result); } - return SUCCESS; } @@ -2470,7 +2478,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r uint64_t buffer_length = buffer.length; void *buffer_addr = reinterpret_cast(reinterpret_cast(buffer.data)); - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", + GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); idx++; @@ -3959,8 +3967,11 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map void *{ + if (known_node_) { + data_dumper_.SetLoopAddr(known_shape_global_step_, nullptr, nullptr); + } else { + // set loop count addr + auto get_var_addr = [&](const string &name) -> void *{ const auto it = variable_by_name.find(name); if (it != variable_by_name.end()) { const auto output_sizes = ModelUtils::GetOutputSize(it->second); @@ -3973,10 +3984,10 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map &outputs); - Status InitInputDescInfo(const OpDescPtr &op_desc); - Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector &out_node_name); + Status InitInputDescInfo(const map &data_by_index); + Status InitOutputDescInfo(const vector &output_op_list); Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); @@ -1057,6 +1061,9 @@ class DavinciModel { vector input_formats_; vector output_descs_; vector output_formats_; + + // known shape node for dump + void *known_shape_global_step_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ diff --git a/ge/graph/load/model_manager/model_manager.cc b/ge/graph/load/model_manager/model_manager.cc index c424a60b..4eb3254b 100755 --- a/ge/graph/load/model_manager/model_manager.cc +++ b/ge/graph/load/model_manager/model_manager.cc @@ -1428,7 +1428,7 @@ Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &me uint8_t *model_data = nullptr; uint32_t model_len = 0; Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); - GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "parse model content failed!"); + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_PARAM_INVALID, "parse model content failed!"); OmFileLoadHelper om_file_helper; ret = om_file_helper.Init(model_data, model_len); diff --git a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc index c34a4e9a..6da1bf63 100644 --- a/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc +++ b/ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc @@ -192,7 +192,7 @@ void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) { if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(), op_desc->GetName())) { dump_flag_ = RT_KERNEL_DUMPFLAG; - dump_args_ = input_output_addr_; + dump_args_ = addr; } } diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index d6027a08..bfef4001 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -100,14 +100,14 @@ Status CachingAllocator::Initialize(uint32_t device_id) { } auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator); if (bin_ptr == nullptr) { - GELOGE(ge::FAILED, "Alloc BlockBin failed."); - return ge::FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } free_block_bins_[i] = bin_ptr; } memory_allocator_ = MemManager::Instance(memory_type_); if (memory_allocator_ == nullptr) { - return ge::FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } return ge::SUCCESS; } diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 55e26cf9..410611b0 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -730,7 +730,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); - GM_RUN_AND_DUMP_PERF("OptimizeBeforeBuildForRts", stages.optimizer.OptimizeGraphBeforeBuildForRts, compute_graph); + GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", + GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, + compute_graph); Status ret = compute_graph->TopologicalSorting(); if (ret != SUCCESS) { diff --git a/ge/graph/manager/graph_mem_allocator.cc b/ge/graph/manager/graph_mem_allocator.cc index f3037299..428b08ae 100755 --- a/ge/graph/manager/graph_mem_allocator.cc +++ b/ge/graph/manager/graph_mem_allocator.cc @@ -64,9 +64,10 @@ uint8_t *MemoryAllocator::MallocMemory(const string &purpose, size_t memory_size Status MemoryAllocator::FreeMemory(uint8_t *memory_addr, uint32_t device_id) const { GELOGI("MemoryAllocator::FreeMemory device_id = %u", device_id); - if (rtFree(memory_addr) != RT_ERROR_NONE) { - GELOGE(ge::INTERNAL_ERROR, "MemoryAllocator::MallocMemory device_id = %u", device_id); - return ge::INTERNAL_ERROR; + auto rtRet = rtFree(memory_addr); + if (rtRet != RT_ERROR_NONE) { + GELOGE(rtRet, "MemoryAllocator::MallocMemory device_id = %u", device_id); + return RT_ERROR_TO_GE_STATUS(rtRet); } memory_addr = nullptr; return ge::SUCCESS; @@ -168,31 +169,36 @@ Status MemManager::Initialize(const std::vector &memory_type) { memory_allocator_map_[index] = memory_allocator; GELOGI("Create MemoryAllocator memory type[%u] success.", index); } else { - GELOGE(ge::INTERNAL_ERROR, "Alloc MemoryAllocator failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc MemoryAllocator failed."); } } else { memory_allocator = it->second; } if (memory_allocator == nullptr) { - GELOGE(ge::INTERNAL_ERROR, "Create MemoryAllocator failed."); - return ge::INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create MemoryAllocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } else { memory_allocator->Initialize(0); } } - if (InitAllocator(memory_type, caching_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create CachingAllocator failed."); - return ge::INTERNAL_ERROR; + auto ret = InitAllocator(memory_type, caching_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create CachingAllocator failed."); + return ret; } - if (InitAllocator(memory_type, rdma_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create RdmaAllocator failed."); - return ge::INTERNAL_ERROR; + + ret = InitAllocator(memory_type, rdma_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create RdmaAllocator failed."); + return ret; } - if (InitAllocator(memory_type, host_allocator_map_) != SUCCESS) { - GELOGE(ge::INTERNAL_ERROR, "Create HostMemAllocator failed."); - return ge::INTERNAL_ERROR; + + ret = InitAllocator(memory_type, host_allocator_map_); + if (ret != SUCCESS) { + GELOGE(ret, "Create HostMemAllocator failed."); + return ret; } return SUCCESS; } @@ -229,7 +235,7 @@ MemoryAllocator *MemManager::GetMemoryAllocator(rtMemType_t memory_type) { // Usually impossible if (memory_allocator == nullptr) { - GELOGE(ge::INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "GetMemoryAllocator failed, memory type is %u.", memory_type); static MemoryAllocator default_memory_allocator(RT_MEMORY_RESERVED); return &default_memory_allocator; } diff --git a/ge/graph/manager/graph_mem_allocator.h b/ge/graph/manager/graph_mem_allocator.h index bd75dbb9..d3468e75 100644 --- a/ge/graph/manager/graph_mem_allocator.h +++ b/ge/graph/manager/graph_mem_allocator.h @@ -192,18 +192,18 @@ class MemManager { allocate_map[index] = allocator; GELOGI("Create Allocator memory type[%u] success.", index); } else { - GELOGE(INTERNAL_ERROR, "Alloc Allocator failed."); + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc Allocator failed."); } } else { allocator = it->second; } if (allocator == nullptr) { - GELOGE(INTERNAL_ERROR, "Create Allocator failed."); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create Allocator failed."); + return ACL_ERROR_GE_MEMORY_ALLOCATION; } else { if (allocator->Initialize() != SUCCESS) { - return INTERNAL_ERROR; + return ACL_ERROR_GE_INTERNAL_ERROR; } } } diff --git a/ge/graph/manager/rdma_pool_allocator.cc b/ge/graph/manager/rdma_pool_allocator.cc index 93d1fd1d..ed243801 100644 --- a/ge/graph/manager/rdma_pool_allocator.cc +++ b/ge/graph/manager/rdma_pool_allocator.cc @@ -51,7 +51,7 @@ RdmaPoolAllocator::RdmaPoolAllocator(rtMemType_t memory_type) Status RdmaPoolAllocator::Initialize() { memory_allocator_ = MemManager::Instance(memory_type_); if (memory_allocator_ == nullptr) { - return ge::FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } return ge::SUCCESS; } diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 1c82eaf3..2a60765f 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -51,6 +51,13 @@ using ClusterPtr = std::shared_ptr; static bool IsInExperimentalMode(const ComputeGraphPtr &root_graph) { for (const auto &node : root_graph->GetAllNodes()) { GE_CHECK_NOTNULL(node->GetOpDesc()); + // not do partition in single op scene. + bool is_singleop = false; + (void)AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_singleop); + if (is_singleop) { + return false; + } + for (const auto &input_desc : node->GetOpDesc()->GetAllInputsDesc()) { auto type = input_desc.GetDataType(); if (type == DT_STRING || type == DT_RESOURCE || type == DT_STRING_REF) { diff --git a/ge/graph/passes/common_subexpression_elimination_pass.cc b/ge/graph/passes/common_subexpression_elimination_pass.cc index 7d9724fc..3587b03e 100644 --- a/ge/graph/passes/common_subexpression_elimination_pass.cc +++ b/ge/graph/passes/common_subexpression_elimination_pass.cc @@ -26,9 +26,6 @@ namespace ge { namespace { -std::set un_compute_attrs = { - {ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES}, -}; std::string GetCseKey(const NodePtr &node) { std::stringstream ss; @@ -53,7 +50,7 @@ std::string GetCseKey(const NodePtr &node) { ss << name << "-"; } - ss << "attrs-" << AttrUtils::GetAttrsStrAfterRid(node->GetOpDesc(), un_compute_attrs); + ss << "attrs-" << AttrUtils::GetAllAttrsStr(node->GetOpDesc()); return ss.str(); } diff --git a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc index 6fa63642..293fd132 100644 --- a/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc +++ b/ge/graph/passes/dynamic_single_op_reset_shape_pass.cc @@ -58,9 +58,9 @@ Status DynamicSingleOpResetShapePass::Run(ComputeGraphPtr graph) { continue; } - // pass node without attr: ATTR_DYNAMIC_SHAPE_SINGLE_AICPU + // pass node without attr: ATTR_SINGLE_OP_SCENE bool single_aicpu_unknown = false; - if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, single_aicpu_unknown) || + if (!AttrUtils::GetBool(node->GetOpDesc(), ATTR_SINGLE_OP_SCENE, single_aicpu_unknown) || !single_aicpu_unknown) { continue; } diff --git a/ge/graph/passes/variable_op_pass_bak.cc b/ge/graph/passes/variable_op_pass_bak.cc deleted file mode 100644 index c9218296..00000000 --- a/ge/graph/passes/variable_op_pass_bak.cc +++ /dev/null @@ -1,811 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "graph/passes/variable_op_pass.h" -#include -#include - -#include "common/formats/formats.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/ge_context.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/utils/graph_utils.h" -#include "graph/utils/tensor_utils.h" -#include "graph/utils/type_utils.h" - -namespace ge { -namespace { -const int kTransOpOutIndex = 0; - -Status ByPassTransNode(NodePtr &front_node, NodePtr &back_node) { - GE_CHECK_NOTNULL(front_node); - GE_CHECK_NOTNULL(back_node); - GELOGD("Begin to bypass trans node %s", front_node->GetName().c_str()); - auto ret = GraphUtils::CopyInCtrlEdges(front_node, back_node); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to move control edges from trans " - "node %s to var-ref %s", - front_node->GetName().c_str(), back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - auto back_node_in_anchor = back_node->GetInDataAnchor(0); - if (back_node_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "The back node %s does not have an " - "input anchor", - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - back_node_in_anchor->UnlinkAll(); - auto trans_in_anchor = front_node->GetInDataAnchor(0); - if (trans_in_anchor == nullptr) { - GELOGE(INTERNAL_ERROR, - "Failed to get the in data anchor from trans" - " node %s type %s", - front_node->GetName().c_str(), front_node->GetType().c_str()); - return INTERNAL_ERROR; - } - auto prev_trans_node_out_anchor = trans_in_anchor->GetPeerOutAnchor(); - if (prev_trans_node_out_anchor == nullptr) { - GELOGW( - "The trans node %s does not have an input, so the ref node %s does" - " not have any inputs after bypass", - front_node->GetName().c_str(), front_node->GetName().c_str()); - } else { - ret = GraphUtils::AddEdge(prev_trans_node_out_anchor, back_node_in_anchor); - if (ret != GRAPH_SUCCESS) { - GELOGE(INTERNAL_ERROR, - "Failed to add edge between ref node %s " - "and the prev node of trans node %s", - back_node->GetName().c_str(), front_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } - return SUCCESS; -} - -bool IsTransSupport(const TransNodeInfo &trans_info) { - if (trans_info.output.GetShape().IsUnknownShape()) { - return false; - } - if (trans_info.node_type == RESHAPE || trans_info.node_type == REFORMAT) { - return true; - } else if (trans_info.node_type == TRANSDATA || trans_info.node_type == TRANSPOSED) { - formats::TransArgs args{nullptr, - trans_info.input.GetFormat(), - trans_info.output.GetFormat(), - trans_info.input.GetShape().GetDims(), - trans_info.output.GetShape().GetDims(), - trans_info.input.GetDataType()}; - return formats::IsTransFormatSupport(args); - } else if (trans_info.node_type == CAST) { - formats::CastArgs datatype_args{nullptr, static_cast(trans_info.input.GetShape().GetShapeSize()), - trans_info.input.GetDataType(), trans_info.output.GetDataType()}; - return formats::IsTransDataTypeSupport(datatype_args); - } else { - return false; - } -} - -std::string GetInAndOutDecsDiff(NodePtr &trans_node, bool reverse = false) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto op_desc = trans_node->GetOpDesc(); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - if (reverse) { - GeTensorDesc tmp_desc = input_desc; - input_desc = output_desc; - output_desc = tmp_desc; - } - auto input_format = input_desc.GetFormat(); - auto input_type = input_desc.GetDataType(); - auto input_shape = input_desc.GetShape(); - auto output_format = output_desc.GetFormat(); - auto output_type = output_desc.GetDataType(); - auto output_shape = output_desc.GetShape(); - std::stringstream diff_key; - diff_key.str(""); - if (input_format != output_format) { - diff_key << static_cast(input_format) << '-' << static_cast(output_format) << '-'; - } else { - diff_key << "*-"; - } - if (input_type != output_type) { - diff_key << static_cast(input_type) << '-' << static_cast(output_type) << '-'; - } else { - diff_key << "*-"; - } - if (!ge::formats::IsShapeEqual(input_shape, output_shape)) { - for (auto dim : input_shape.GetDims()) { - diff_key << dim << '-'; - } - for (auto dim : output_shape.GetDims()) { - diff_key << dim << '-'; - } - } else { - diff_key << "*"; - } - return diff_key.str(); -} -} // namespace - -Status VariableOpPass::Run(ge::ComputeGraphPtr graph) { - if (graph == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run variable op pass, null graph"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to run variable op pass on graph %s, session %lu, graph id %u", graph->GetName().c_str(), - GetContext().SessionId(), graph->GetGraphID()); - - if (var_accelerate_ctrl_ == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to run var op pass, the variable accelerate control is null"); - return INTERNAL_ERROR; - } - - GELOGD("Begin to generate ref map for variable and refs, graph name:%s.", graph->GetName().c_str()); - if (RenewVarDesc(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew var desc on graph"); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - GELOGD("Begin to fusion variables and trans nodes"); - for (auto &var_to_refs : var_and_var_ref_map_) { - auto &node = var_to_refs.first; - GE_CHECK_NOTNULL(node); - GE_CHECK_NOTNULL(var_accelerate_ctrl_); - if (!var_accelerate_ctrl_->IsVarPermitToChangeFormats(node->GetName())) { - GELOGD("The var %s does not permit to change formats, skip it", node->GetName().c_str()); - continue; - } - - VarTransRoad fusion_road; - auto ret = FusionIfNeed(node, fusion_road); - if (ret != SUCCESS) { - return ret; - } - - if (fusion_road.empty()) { - GELOGD("No need to fusion variable %s because it's fusion road is empty", node->GetName().c_str()); - continue; - } - - ret = RenewTransRoadDesc(node, fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to renew description fusion road for var %s", node->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - auto start_iter = fusion_road.begin(); - auto end_iter = fusion_road.rbegin(); - GELOGD( - "Trans variable data for %s from format %s to %s, shape %s to %s " - "data-type %s to %s, path len %zu success", - node->GetName().c_str(), TypeUtils::FormatToSerialString(start_iter->input.GetFormat()).c_str(), - TypeUtils::FormatToSerialString(end_iter->output.GetFormat()).c_str(), - formats::ShapeToString(start_iter->input.GetShape().GetDims()).c_str(), - formats::ShapeToString(end_iter->output.GetShape().GetDims()).c_str(), - TypeUtils::DataTypeToSerialString(start_iter->input.GetDataType()).c_str(), - TypeUtils::DataTypeToSerialString(end_iter->output.GetDataType()).c_str(), fusion_road.size()); - - ret = VarManager::Instance(graph->GetSessionID())->SetTransRoad(node->GetName(), fusion_road); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the format fusion road for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - ret = VarManager::Instance(graph->GetSessionID())->SetChangedGraphId(node->GetName(), graph->GetGraphID()); - if (ret != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to update the graph id for var %s", node->GetName().c_str()); - return INTERNAL_ERROR; - } - var_accelerate_ctrl_->SetVarChanged(node->GetName()); - - GELOGD("Begin to update format info for var %s.", node->GetName().c_str()); - std::set node_set({node}); - if (UpdateIOFormatInfo(end_iter->output, node_set) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - // renew var desc if the trans_road is all reshape or reformat - ret = RenewVarDesc(graph->GetSessionID(), node, fusion_road); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - - return SUCCESS; -} - -Status VariableOpPass::RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road) { - auto var_desc = var->GetOpDesc(); - GE_CHECK_NOTNULL(var_desc); - TransNodeInfo prev_node_info; - prev_node_info.node_type = var->GetType(); - prev_node_info.output = var_desc->GetOutputDesc(0); - // two cases - // fisrt Var->cast->transdata which transdata in fusion road - // the input of transdata is not equal with output of var - // case 1 : suppose input dtype of transdata equal with out dtype - // but not equal with var - // so we make input dtype and output dytpe of transroad equal with var - // case 2: suppose input format of transdata not equal with out format - // and input format not equal with var - // so we make input format equal with var - for (auto &cur_trans : fusion_road) { - if (cur_trans.input.GetFormat() == cur_trans.output.GetFormat()) { - cur_trans.output.SetFormat(prev_node_info.output.GetFormat()); - } - if (cur_trans.input.GetDataType() == cur_trans.output.GetDataType()) { - cur_trans.output.SetDataType(prev_node_info.output.GetDataType()); - } - if (ge::formats::IsShapeEqual(cur_trans.input.GetShape(), cur_trans.output.GetShape())) { - cur_trans.output.SetShape(prev_node_info.output.GetShape()); - } - cur_trans.input = prev_node_info.output; - prev_node_info.output = cur_trans.output; - } - return SUCCESS; -} - -Status VariableOpPass::FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road) { - bool can_fusion = false; - while (true) { - map> trans_type_to_trans_ops ; - map> trans_type_to_changed_desc; - // record the order of trans op in first path - vector first_path_trans_order; - auto ret = CheckIfCouldBeOptimized(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, can_fusion); - if (ret != SUCCESS) { - GELOGE(FAILED, "Check trans ops after vatiable could be optimized or not failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - vector> delete_var_ref_trans_nodes; - ret = GetAndCheckTransOpOfVarRef(var, can_fusion, trans_type_to_changed_desc, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - GELOGE(FAILED, "get and check trans op of varref failed"); - return ret; - } - - if (!can_fusion) { - break; - } - - ret = UpdateTransRoad(fusion_road, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Update trans road failed"); - return ret; - } - - if (fusion_road.empty()) { - return SUCCESS; - } - - ret = DealFusion(var, fusion_road, trans_type_to_changed_desc, - trans_type_to_trans_ops, delete_var_ref_trans_nodes); - if (ret != SUCCESS) { - return ret; - } - } - return SUCCESS; -} - -Status VariableOpPass::UpdateTransRoad(VarTransRoad &fusion_road, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops){ - vector delete_trans_type; - for (auto &trans_type : first_path_trans_order) { - if (trans_type_to_changed_desc.find(trans_type) == trans_type_to_changed_desc.end()) { - continue; - } - bool delete_flag = false; - for (auto &trans_node : trans_type_to_trans_ops[trans_type]) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - auto out_op_desc = trans_node->GetOpDesc(); - GE_CHECK_NOTNULL(out_op_desc); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - if (!IsTransSupport(trans_node_info)) { - delete_flag = true; - GELOGD("The trans node %s does not support, skip the variable accelerating", trans_node_info.node_type.c_str()); - break; - } - } - if (delete_flag) { - delete_trans_type.push_back(trans_type); - } else { - auto &trans_node = *trans_type_to_trans_ops[trans_type].begin(); - auto out_op_desc = trans_node->GetOpDesc(); - int tran_in_index = TransOpUtil::GetTransOpDataIndex(trans_node->GetType()); - TransNodeInfo trans_node_info; - trans_node_info.node_type = trans_node->GetType(); - trans_node_info.input = out_op_desc->GetInputDesc(tran_in_index); - trans_node_info.output = out_op_desc->GetOutputDesc(kTransOpOutIndex); - fusion_road.emplace_back(trans_node_info); - } - } - for (auto &trans_type : delete_trans_type) { - trans_type_to_changed_desc.erase(trans_type); - } - return SUCCESS; -} - -Status VariableOpPass::DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes) { - GE_CHECK_NOTNULL(var_node); - GELOGD("Begin to fusion var %s with trans", var_node->GetName().c_str()); - auto graph = var_node->GetOwnerComputeGraph(); - for (auto &trans_type : trans_type_to_changed_desc) { - for (auto &trans_node : trans_type_to_trans_ops[trans_type.first]) { - GELOGD("Remove node %s type %s when fusion with variable %s", trans_node->GetName().c_str(), - trans_node->GetType().c_str(), var_node->GetName().c_str()); - if (RenewTransOpDesc(trans_node, true) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::IsolateNode(trans_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (GraphUtils::RemoveNodeWithoutRelink(graph, trans_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - - // Iterate delete_trans_nodes backward, eg a->b->c, delete_trans_nodes:{{b,c},{a,b}} - // we should delete {a,b} first , then b->c,then we can delete {b,c} - // if we delete {b,c} first, then a->c, then we can not get b when we delete {a,b} - for (auto iter = delete_trans_nodes.rbegin(); iter != delete_trans_nodes.rend(); ++iter) { - auto front_node = iter->first; - auto back_node = iter->second; - if (RenewTransOpDesc(front_node, false) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (front_node->GetOutDataNodes().size() > 1) { - GELOGD("The trans node %s type %s connecting with var-ref %s has more" - " than one output data nodes, unlink the edge between them", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (ByPassTransNode(front_node, back_node) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to bypass trans node %s to node %s", front_node->GetName().c_str(), - back_node->GetName().c_str()); - return INTERNAL_ERROR; - } - } else { - GELOGD("The trans node %s type %s connecting with %s has only" - " one output data nodes, isolate and remove it.", - front_node->GetName().c_str(), front_node->GetType().c_str(), back_node->GetName().c_str()); - if (GraphUtils::IsolateNode(front_node, {0}) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - if (GraphUtils::RemoveNodeWithoutRelink(graph, front_node) != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewTransOpDesc(ge::NodePtr &node, bool is_reverse) { - int tran_in_index = TransOpUtil::GetTransOpDataIndex(node->GetType()); - auto op_desc = node->GetOpDesc(); - GE_CHECK_NOTNULL(op_desc); - GeTensorDesc input_desc = op_desc->GetInputDesc(tran_in_index); - GeTensorDesc output_desc = op_desc->GetOutputDesc(kTransOpOutIndex); - GeTensorDesc renew_desc = is_reverse ? output_desc : input_desc; - bool format_changed = false; - bool shape_changed = false; - bool dtype_changed = false; - if (input_desc.GetFormat() != output_desc.GetFormat()) { - format_changed = true; - } - if (input_desc.GetDataType() != output_desc.GetDataType()) { - dtype_changed = true; - } - if (!ge::formats::IsShapeEqual(input_desc.GetShape(), output_desc.GetShape())) { - shape_changed = true; - } - auto cur_node = node; - while (TransOpUtil::IsTransOp(cur_node)) { - tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - auto next_node = is_reverse ? NodeUtils::GetInDataNodeByIndex(*cur_node, tran_in_index) : - cur_node->GetOutDataNodes().at(kTransOpOutIndex); - if (!TransOpUtil::IsTransOp(next_node)) { - break; - } - auto prev_desc = next_node->GetOpDesc(); - tran_in_index = TransOpUtil::GetTransOpDataIndex(next_node->GetType()); - auto mutable_output_desc = prev_desc->MutableOutputDesc(kTransOpOutIndex); - auto mutable_input_desc = prev_desc->MutableInputDesc(tran_in_index); - GE_CHECK_NOTNULL(prev_desc->MutableOutputDesc(kTransOpOutIndex)); - GE_CHECK_NOTNULL(prev_desc->MutableInputDesc(tran_in_index)); - if (shape_changed) { - mutable_input_desc->SetShape(renew_desc.GetShape()); - mutable_output_desc->SetShape(renew_desc.GetShape()); - } - if (dtype_changed) { - mutable_input_desc->SetDataType(renew_desc.GetDataType()); - mutable_output_desc->SetDataType(renew_desc.GetDataType()); - } - if (format_changed) { - mutable_input_desc->SetFormat(renew_desc.GetFormat()); - mutable_output_desc->SetFormat(renew_desc.GetFormat()); - } - cur_node = next_node; - } - return SUCCESS; -} - -Status VariableOpPass::CheckIfCouldBeOptimized(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag) { - bool is_match = true; - auto ret = GetSameTransOP(var, first_path_trans_order, trans_type_to_changed_desc, - trans_type_to_trans_ops, is_match); - - if (ret != SUCCESS) { - GELOGE(FAILED, "Get same trans op of variable node: %s failed", var->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - - if (!is_match) { - flag = false; - GELOGI("trans nodes after variable do not meet the condition"); - return SUCCESS; - } - - flag = true; - return SUCCESS; -} - -Status VariableOpPass::GetSameTransOP(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - GELOGD("Begin to get Node: %s trans op info of first path", var->GetName().c_str()); - auto ret = GetFisrtPathTransInfo(var, first_path_trans_order, - trans_type_to_changed_desc, trans_type_to_trans_ops); - if (ret != SUCCESS) { - GELOGE(FAILED, "Get var: %s first path trans info failed", var->GetName().c_str()); - return FAILED; - } - - if (first_path_trans_order.empty()) { - GELOGD("var %s first path has no trans op, not need to pass", var->GetName().c_str()); - is_match = false; - return SUCCESS; - } - - GELOGD("Begin to depth first search Node: %s ", var->GetName().c_str()); - VariableDFS(var, trans_type_to_changed_desc, trans_type_to_trans_ops, is_match); - - return SUCCESS; -} - -void VariableOpPass::VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match) { - std::stack node_stack; - std::stack> path_stack; - for (auto &out_node : node->GetOutDataNodes()) { - if (!is_match) { - break; - } - if (out_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(out_node)) { - is_match = false; - break; - } - node_stack.push(out_node); - path_stack.emplace(vector{out_node}); - while (!node_stack.empty() && is_match) { - auto cur_node = node_stack.top(); - auto cur_path = path_stack.top(); - node_stack.pop(); - path_stack.pop(); - if (cur_node->GetOutDataNodesSize() == 0 || !ge::TransOpUtil::IsTransOp(cur_node)) { - UpdateTransInfo(cur_path, is_match, trans_type_to_changed_desc, trans_type_to_trans_ops); - continue; - } - for (auto &next_node : cur_node->GetOutDataNodes()) { - node_stack.push(next_node); - auto next_path = cur_path; - next_path.push_back(next_node); - path_stack.emplace(next_path); - } - } - } -} - -Status VariableOpPass::UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - GELOGD("Begin to update trans info by path"); - std::set trans_op_occured; - for (auto &trans_node : cur_path) { - auto trans_node_type = trans_node->GetType(); - if (trans_op_occured.find(trans_node_type) != trans_op_occured.end() || - !ge::TransOpUtil::IsTransOp(trans_node_type)) { - continue; - } - trans_op_occured.insert(trans_node_type); - auto desc_diff = GetInAndOutDecsDiff(trans_node); - if (trans_type_to_changed_desc.find(trans_node_type) != trans_type_to_changed_desc.end() && - desc_diff == trans_type_to_changed_desc[trans_node_type].first) { - trans_type_to_changed_desc[trans_node_type].second = true; - auto iter = find(trans_type_to_trans_ops[trans_node_type].begin(), - trans_type_to_trans_ops[trans_node_type].end(), - trans_node); - if (iter == trans_type_to_trans_ops[trans_node_type].end()) { - trans_type_to_trans_ops[trans_node_type].push_back(trans_node); - } - } - } - std::set delete_trans_types; - for (auto &trans_item : trans_type_to_changed_desc) { - if (!trans_item.second.second) { - delete_trans_types.insert(trans_item.first); - } else { - trans_item.second.second = false; - } - } - for (auto& delete_item : delete_trans_types) { - trans_type_to_changed_desc.erase(delete_item); - } - if (trans_type_to_changed_desc.empty()) { - is_match = false; - } - return SUCCESS; -} - -Status VariableOpPass::GetFisrtPathTransInfo(const NodePtr &var, vector &first_path_trans_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops) { - auto cur_node = var; - while (cur_node->GetOutDataNodesSize() != 0) { - cur_node = cur_node->GetOutDataNodes().at(0); - GE_CHECK_NOTNULL(cur_node); - if (!ge::TransOpUtil::IsTransOp(cur_node)) { - break; - } - auto cur_node_type = cur_node->GetType(); - // only get the the first occurrence operator of same type - if (trans_type_to_changed_desc.find(cur_node_type) == trans_type_to_changed_desc.end()) { - auto desc_diff = GetInAndOutDecsDiff(cur_node); - trans_type_to_changed_desc[cur_node->GetType()] = make_pair(desc_diff, false); - trans_type_to_trans_ops[cur_node->GetType()] = vector{cur_node}; - first_path_trans_order.push_back(cur_node->GetType()); - } - } - GELOGD("get var %s first path trans info success", var->GetName().c_str()); - return SUCCESS; -} - -Status VariableOpPass::GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes) { - auto iterator = var_and_var_ref_map_.find(var_node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGD("there is no var_ref of node %s", var_node->GetName().c_str()); - return SUCCESS; - } - vector delete_trans_type; - for (auto &trans_type : trans_type_to_changed_desc) { - delete_trans_type.push_back(trans_type.first); - } - for (auto &ref_node : iterator->second) { - GE_CHECK_NOTNULL(ref_node); - auto cur_node = *ref_node->GetInDataNodes().begin(); - auto behind_node = ref_node; - GE_CHECK_NOTNULL(cur_node); - vector tmp_delete_trans_type = delete_trans_type; - while (TransOpUtil::IsTransOp(cur_node)) { - GE_CHECK_NOTNULL(cur_node); - auto iter = find(tmp_delete_trans_type.begin(), tmp_delete_trans_type.end(), cur_node->GetType()); - if (iter != tmp_delete_trans_type.end()) { - CheckTransOpOfVarAndVarRefSymmetry(cur_node, trans_type_to_changed_desc[cur_node->GetType()].first, - pass_check); - if (!pass_check) { - GELOGD("trans op : %s of var ref %s is illegal", cur_node->GetName().c_str(), ref_node->GetName().c_str()); - return SUCCESS; - } - tmp_delete_trans_type.erase(iter); - delete_var_ref_trans_nodes.emplace_back(std::make_pair(cur_node, behind_node)); - } - int tran_in_index = TransOpUtil::GetTransOpDataIndex(cur_node->GetType()); - behind_node = cur_node; - cur_node = cur_node->GetInDataNodes().at(tran_in_index); - } - if (!tmp_delete_trans_type.empty()) { - pass_check = false; - return SUCCESS; - } - } - return SUCCESS; -} - -Status VariableOpPass::CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, - bool &is_symmetry){ - auto var_ref_trans_op_desc_diff = GetInAndOutDecsDiff(var_ref_trans_op, true); - is_symmetry = (var_ref_trans_op_desc_diff == desc_diff); - return SUCCESS; -} - -Status VariableOpPass::UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node) { - if (node == nullptr || node->GetOpDesc() == nullptr) { - GELOGE(FAILED, "node or opdesc is nullptr"); - return FAILED; - } - const Format &format = final_output.GetFormat(); - const DataType &data_type = final_output.GetDataType(); - const GeShape &shape = final_output.GetShape(); - GELOGD("last ref is (%s, %s, %lu), var_ref_name is %s.", TypeUtils::DataTypeToSerialString(data_type).c_str(), - TypeUtils::FormatToSerialString(format).c_str(), shape.GetDims().size(), node->GetName().c_str()); - - auto node_desc = node->GetOpDesc()->GetOutputDesc(0); - CopyVariableFormatDataTypeAndShape(final_output, node_desc); - if (node->GetOpDesc()->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGE(FAILED, "update output desc fail."); - return FAILED; - } - GELOGD("node ref is (%s, %s, %lu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetDataType()).c_str(), - TypeUtils::FormatToSerialString(node->GetOpDesc()->GetOutputDesc(0).GetFormat()).c_str(), - node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims().size(), node->GetName().c_str()); - - auto iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - auto graph = node->GetOwnerComputeGraph(); - if (GenerateVariableVariableRefMap(graph) != SUCCESS) { - GELOGE(INTERNAL_ERROR, "Failed to generate variable map for graph %s", graph->GetName().c_str()); - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - iterator = var_and_var_ref_map_.find(node); - if (iterator == var_and_var_ref_map_.end()) { - GELOGW("The var node %s which belongs to graph %s can not be found on the graph", node->GetName().c_str(), - node->GetOwnerComputeGraph()->GetName().c_str()); - return SUCCESS; - } - - for (const auto &var_ref_node : iterator->second) { - auto var_ref_node_description = var_ref_node->GetOpDesc(); - GE_CHECK_NOTNULL(var_ref_node_description); - - GELOGD("var_ref_node before is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(data_type).c_str(), TypeUtils::FormatToSerialString(format).c_str(), - shape.GetDims().size(), var_ref_node->GetName().c_str()); - if (var_ref_node_description->UpdateOutputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateOutputDesc fail."); - } - if (var_ref_node_description->UpdateInputDesc(0, node_desc) != GRAPH_SUCCESS) { - GELOGW("UpdateInputDesc fail."); - } - const auto &input_desc = var_ref_node_description->MutableInputDesc(0); - const auto &output_desc = var_ref_node_description->MutableOutputDesc(0); - GE_CHECK_NOTNULL(input_desc); - GE_CHECK_NOTNULL(output_desc); - GELOGD("var_ref_node ref is (%s, %s, %zu), var_ref_name is %s.", - TypeUtils::DataTypeToSerialString(input_desc->GetDataType()).c_str(), - TypeUtils::FormatToSerialString(input_desc->GetFormat()).c_str(), output_desc->GetShape().GetDims().size(), - var_ref_node->GetName().c_str()); - } - - return SUCCESS; -} - -Status VariableOpPass::GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph) { - std::map names_to_var; - std::map> names_to_refs; - GE_CHECK_NOTNULL(compute_graph); - for (auto &node : compute_graph->GetDirectNode()) { - if (node->GetType() != VARIABLE) { - continue; - } - std::string ref_var_name; - if (!ge::AttrUtils::GetStr(node->GetOpDesc(), REF_VAR_SRC_VAR_NAME, ref_var_name)) { - names_to_var[node->GetName()] = node; - } else { - names_to_refs[ref_var_name].insert(node); - } - } - - for (auto &name_to_var : names_to_var) { - var_and_var_ref_map_[name_to_var.second] = names_to_refs[name_to_var.first]; - } - return SUCCESS; -} - -void VariableOpPass::CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, - GeTensorDesc &dst_tensor_desc) { - dst_tensor_desc.SetShape(src_tensor_desc.GetShape()); - dst_tensor_desc.SetFormat(src_tensor_desc.GetFormat()); - dst_tensor_desc.SetDataType(src_tensor_desc.GetDataType()); -} - -Status VariableOpPass::UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes) { - for (auto &need_set_node : nodes) { - auto ret = UpdateVarAndRefOutputFormatInfo(final_output, need_set_node); - if (ret != SUCCESS) { - return GE_GRAPH_VARIABLE_OP_PASS_FAILED; - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(ge::ComputeGraphPtr &graph) { - GE_CHECK_NOTNULL(graph); - // renew var manager desc - Status ret = SUCCESS; - for (auto &node : graph->GetDirectNode()) { - bool is_var_node = - (node->GetType() == VARIABLE) || (node->GetType() == VARIABLEV2) || (node->GetType() == VARHANDLEOP); - if (is_var_node) { - if (!ge::VarManager::Instance(graph->GetSessionID())->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - continue; - } - GELOGD("var manager exist var node[%s], graph name[%s]", node->GetName().c_str(), graph->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - ret = ge::VarManager::Instance(graph->GetSessionID())->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - } - } - return SUCCESS; -} - -Status VariableOpPass::RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road) { - // renew var desc if the trans_road is all reshape or reformat - for (auto &road : fusion_road) { - if (road.node_type != RESHAPE && road.node_type != REFORMAT) { - return SUCCESS; - } - } - - if (!ge::VarManager::Instance(session_id)->IsVarExist(node->GetName())) { - GELOGD("var manager does not exist var node[%s]", node->GetName().c_str()); - return SUCCESS; - } - GELOGD("var manager exist var node[%s]", node->GetName().c_str()); - GE_CHECK_NOTNULL(node->GetOpDesc()); - Status ret = ge::VarManager::Instance(session_id)->RenewCurVarDesc(node->GetName(), node->GetOpDesc()); - if (ret != SUCCESS) { - GELOGE(FAILED, "var manager renew var[%s] descriptor failed!", node->GetName().c_str()); - return FAILED; - } - - return SUCCESS; -} - -} // namespace ge diff --git a/ge/graph/passes/variable_op_pass_bak.h b/ge/graph/passes/variable_op_pass_bak.h deleted file mode 100644 index fccd063b..00000000 --- a/ge/graph/passes/variable_op_pass_bak.h +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#define GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ -#include -#include -#include -#include "graph/common/transop_util.h" -#include "common/formats/utils/formats_trans_utils.h" -#include "graph/utils/node_utils.h" -#include "graph/graph.h" -#include "graph/manager/graph_var_manager.h" -#include "graph/manager/util/variable_accelerate_ctrl.h" -#include "inc/graph_pass.h" - -namespace ge { -namespace variable_op { -struct NodeDesc { - ge::GeTensorDesc input; - ge::GeTensorDesc output; - bool is_update = false; -}; -} // namespace variable_op -class VariableOpPass : public GraphPass { - public: - explicit VariableOpPass(VarAccelerateCtrl *ctrl) : var_accelerate_ctrl_(ctrl) {} - - ~VariableOpPass() override = default; - - Status Run(ge::ComputeGraphPtr graph) override; - - private: - Status UpdateTransRoad(VarTransRoad &fusion_road, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status DealFusion(const ge::NodePtr &var_node, VarTransRoad &fusion_road, - map> trans_type_to_changed_desc, - map> trans_type_to_trans_ops, - vector> &delete_trans_nodes); - - Status RenewTransOpDesc(ge::NodePtr &node, bool is_reverse); - - Status RenewTransRoadDesc(const NodePtr &var, VarTransRoad &fusion_road); - - Status CheckIfCouldBeOptimized(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &flag); - - Status FusionIfNeed(const NodePtr &var, VarTransRoad &fusion_road); - - Status GetSameTransOP(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status GetFisrtPathTransInfo(const NodePtr &var, vector &trans_road_order, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - void VariableDFS(const NodePtr &node, map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops, bool &is_match); - - Status UpdateTransInfo(vector &cur_path, bool& is_match, - map> &trans_type_to_changed_desc, - map> &trans_type_to_trans_ops); - - Status GetAndCheckTransOpOfVarRef(const ge::NodePtr &var_node, bool &pass_check, - map> &trans_type_to_changed_desc, - vector> &delete_var_ref_trans_nodes); - - Status CheckTransOpOfVarAndVarRefSymmetry(NodePtr &var_ref_trans_op, const string &desc_diff, bool &is_symmetry); - - Status UpdateVarAndRefOutputFormatInfo(const GeTensorDesc &final_output, const ge::NodePtr &node); - - Status GenerateVariableVariableRefMap(const ComputeGraphPtr &compute_graph); - - void CopyVariableFormatDataTypeAndShape(const GeTensorDesc &src_tensor_desc, GeTensorDesc &dst_tensor_desc); - - Status UpdateIOFormatInfo(const GeTensorDesc &final_output, std::set &nodes); - - Status RenewVarDesc(ge::ComputeGraphPtr &graph); - - Status RenewVarDesc(uint64_t session_id, const NodePtr &node, const VarTransRoad &fusion_road); - - map> var_and_var_ref_map_; - - VarAccelerateCtrl *var_accelerate_ctrl_; -}; -} // namespace ge -#endif // GE_GRAPH_PASSES_VARIABLE_OP_PASS_H_ diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index c6179095..63f1b131 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1925,7 +1925,7 @@ void GraphPrepare::TypeConversionOfConstant() { for (ge::NodePtr &n : compute_graph_->GetAllNodes()) { // This can ensure that n is not a null pointer // No Conversion when called by aclOpCompile - (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_DYNAMIC_SHAPE_SINGLE_AICPU, is_acl_compile); + (void)AttrUtils::GetBool(n->GetOpDesc(), ATTR_SINGLE_OP_SCENE, is_acl_compile); if (is_acl_compile) { return; } diff --git a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc index 7b0ffc02..b1534eb4 100755 --- a/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc @@ -540,7 +540,7 @@ Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map aipp_params(new (std::nothrow) domi::AippOpParams()); ge::GeAttrValue::NAMED_ATTRS aipp_attr; - GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, + GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(data_op, ATTR_NAME_AIPP, aipp_attr), ACL_ERROR_GE_AIPP_NOT_EXIST, "Data node do not contain param aipp!"); GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, aipp_params.get()), "get aipp params failed"); diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc index b7c6c33d..67c85460 100644 --- a/ge/hybrid/executor/hybrid_model_async_executor.cc +++ b/ge/hybrid/executor/hybrid_model_async_executor.cc @@ -221,7 +221,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy auto &tensor_desc = input_tensor_desc_[input_index]; tensor_desc->SetShape(GeShape(current_data.shapes[input_index])); args.input_desc[input_index] = tensor_desc; - GELOGD("Update shape of input[%u] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); + GELOGD("Update shape of input[%zu] to [%s]", input_index, tensor_desc->MutableShape().ToString().c_str()); GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorMemorySizeInBytes(*tensor_desc, tensor_size), "Failed to calc tensor size, index = %zu, shape = [%s]", input_index, @@ -238,7 +238,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy GE_CHECK_NOTNULL(tensor_buffer); args.inputs.emplace_back(std::shared_ptr(tensor_buffer.release())); - GELOGD("To copy input data for input[%u]", input_index); + GELOGD("To copy input data for input[%zu]", input_index); const DataBuffer &data_buf = blobs[input_index]; auto mem_size = static_cast(tensor_size); GE_CHK_BOOL_RET_STATUS(mem_size >= data_buf.length, @@ -247,7 +247,7 @@ Status HybridModelAsyncExecutor::PrepareInputs(const InputData ¤t_data, Hy data_buf.length, mem_size); - GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%u] datasize[%lu]", + GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%zu] memaddr[%p] mem_size[%zu] datasize[%lu]", model_->root_runtime_param_.graph_id, input_index, args.inputs[input_index].GetData(), diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 44f7d87f..a6386b27 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -174,6 +174,38 @@ Status NodeDoneCallback::GetGraphDescInfo(const NodePtr node, const HybridModel compute_graph_info = context_->GetProfilingGraphDescInfo(); context_->ClearProfilingGraphDescInfo(); + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + for (auto &tmp_compute_graph_info : compute_graph_info) { + // default + if (op_desc->GetAllInputsSize() == 0) { + tmp_compute_graph_info.input_format = { FORMAT_NULL }; + tmp_compute_graph_info.input_shape = { {0} }; + tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; + } + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); + if (input_desc == nullptr) { + continue; + } + tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); + tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); + tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); + } + + if (op_desc->GetOutputsSize() == 0) { + tmp_compute_graph_info.output_format = { FORMAT_NULL }; + tmp_compute_graph_info.output_shape = { {0} }; + tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; + } + for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { + GeTensorDesc output_desc = op_desc->GetOutputDesc(j); + tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); + tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); + tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); + } + } + return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 861cd30a..b314c6a7 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -939,7 +939,7 @@ Status HybridModelBuilder::InitVariableTensors() { GELOGE(MEMALLOC_FAILED, "Malloc host memory for an existed GeTensor failed."); return MEMALLOC_FAILED; } - GELOGD("Host variable [%s] malloc success, size=%lld.", it.first.c_str(), tensor_size); + GELOGD("Host variable [%s] malloc success, size=%ld.", it.first.c_str(), tensor_size); std::unique_ptr tensor(new (std::nothrow) TensorValue(mem_info.host_aligned_ptr->MutableGet(), tensor_size)); @@ -1608,16 +1608,19 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // create fp node bool is_insert_fp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_FP_PROFILILNG_TASK, is_insert_fp_profiling_task); if (is_insert_fp_profiling_task) { + vector task_def_list; (void)GenerateFpProfilingTask(op_desc, task_def_list); auto fp_desc = MakeShared(kProfilingFpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(fp_desc); fp_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(fp_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create fp profiling node success before."); } // creat all reduce start node @@ -1625,6 +1628,7 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld before", log_id); @@ -1634,18 +1638,24 @@ Status HybridModelBuilder::CreateProfilingNodeBefore(GraphItem &graph_item, cons GE_CHECK_NOTNULL(ar_desc_start); ar_desc_start->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_start); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce start profiling node success before."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node before."); } @@ -1661,12 +1671,13 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(compute_graph); NodePtr node_ptr = nullptr; - vector task_def_list; + map> node_task_map; // Create all reduce end node bool is_insert_bp_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_BP_PROFILILNG_TASK, is_insert_bp_profiling_task); bool is_all_reduce = (op_desc->GetType() == HCOMALLREDUCE || op_desc->GetType() == HVDCALLBACKALLREDUCE); if (is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; int64_t log_id = 0; (void)ge::AttrUtils::GetInt(op_desc, ATTR_NAME_INSERT_PROFILILNG_TASK_LOG_ID, log_id); GELOGD("All reduce node profiling task log id: %ld after", log_id); @@ -1676,38 +1687,50 @@ Status HybridModelBuilder::CreateProfilingNodeAfter(GraphItem &graph_item, const GE_CHECK_NOTNULL(ar_desc_end); ar_desc_end->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(ar_desc_end); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create all reduce end profiling node success after."); } // create bp node if (!is_all_reduce && is_insert_bp_profiling_task) { + vector task_def_list; (void) GenerateBpProfilingTask(op_desc, task_def_list); auto bp_op_desc = MakeShared(kProfilingBpNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(bp_op_desc); bp_op_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(bp_op_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create bp profiling node success after."); } // create end node bool is_insert_end_profiling_task = false; (void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_INSERT_END_PROFILILNG_TASK, is_insert_end_profiling_task); if (is_insert_end_profiling_task) { + vector task_def_list; (void)GenerateEndProfilingTask(op_desc, task_def_list); auto end_desc = MakeShared(kProfilingEndNode, PROFILINGTRAININGTRACE); GE_CHECK_NOTNULL(end_desc); end_desc->SetOpKernelLibName(kEngineNameRts); node_ptr = compute_graph->AddNode(end_desc); + GE_CHECK_NOTNULL(node_ptr); + node_task_map[node_ptr] = task_def_list; GELOGD("Create end profiling node success after."); } - if (node_ptr != nullptr) { - for (const auto &task_def : task_def_list) { - hybrid_model_.task_defs_[node_ptr].emplace_back(task_def); + if (!node_task_map.empty()) { + for (const auto &node_task : node_task_map) { + NodePtr profiling_node = node_task.first; + vector task_def_lists = node_task.second; + for (const auto &task_def : task_def_lists) { + hybrid_model_.task_defs_[profiling_node].emplace_back(task_def); + } + NodeItem *node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(profiling_node, &node_item)); + node_item->input_start = 0; + node_item->output_start = 0; + graph_item.node_items_.emplace_back(node_item); } - NodeItem *node_item = nullptr; - GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(node_ptr, &node_item)); - node_item->input_start = 0; - node_item->output_start = 0; - graph_item.node_items_.emplace_back(node_item); } else { GELOGD("No need to create profiling node after."); } diff --git a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc index b8acbf0e..e9c7c604 100644 --- a/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc @@ -29,8 +29,9 @@ constexpr int64_t kDimEndFlag = INT64_MIN; Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GELOGI("Node[%s] parse ext info start.", node_name_.c_str()); if (ext_info.empty()) { - GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", + node_name_.c_str()); + return ACL_ERROR_GE_PARAM_INVALID; } ext_info_len_ = ext_info.size(); @@ -38,8 +39,8 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { GE_CHECK_NOTNULL(ext_info_); if (memcpy_s(ext_info_.get(), ext_info_len_, ext_info.c_str(), ext_info.size()) != EOK) { - GELOGE(FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to coy ext info", node_name_.c_str()); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } input_shape_and_type_.clear(); @@ -72,7 +73,7 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { offset += aicpu_ext_info->infoLen; } - GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(offset == ext_info_len_, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] ext_info format error, parse not reach end, offset=%zu, ext_info_len=%zu.", node_name_.c_str(), offset, ext_info_len_); GELOGI("Node[%s] parse ext info end.", node_name_.c_str()); @@ -80,13 +81,13 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { } Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(int32_t), ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext shape type failed as infoLen must be %zu but %u.", node_name_.c_str(), sizeof(int32_t), aicpu_ext_info->infoLen); auto type = reinterpret_cast(aicpu_ext_info->infoMsg); - GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(*type == unknown_type_, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext shape type failed as need %d but %d.", node_name_.c_str(), unknown_type_, *type); GELOGI("Node[%s] parse ext shape type success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); @@ -95,7 +96,7 @@ Status AicpuExtInfoHandler::ParseExtShapeType(AicpuExtInfo *aicpu_ext_info) { Status AicpuExtInfoHandler::ParseExtInputShape(AicpuExtInfo *aicpu_ext_info) { auto need_len = input_num_ * sizeof(AicpuShapeAndType); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext input shape failed as infoLen must be " "input_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", node_name_.c_str(), input_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); @@ -116,7 +117,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { return SUCCESS; } auto need_len = output_num_ * sizeof(AicpuShapeAndType); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == need_len, ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext output shape failed as infoLen must be " "output_num[%u]*sizeof(ShapeAndType)[%zu] but %u.", node_name_.c_str(), output_num_, sizeof(AicpuShapeAndType), aicpu_ext_info->infoLen); @@ -130,7 +131,7 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { } Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { - GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), ACL_ERROR_GE_PARAM_INVALID, "Node[%s] parse ext session info failed as infoLen must be %zu but %u.", node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); @@ -173,7 +174,7 @@ Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const } Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc) { - GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), INTERNAL_ERROR, + GE_CHK_BOOL_RET_STATUS((unknown_type_ != DEPEND_COMPUTE), ACL_ERROR_GE_INTERNAL_ERROR, "Node[%s] is depend compute is no need update output shape and type by ext.", node_name_.c_str()); GE_CHECK_LE(output_index, output_num_); @@ -183,7 +184,7 @@ Status AicpuExtInfoHandler::UpdateOutputShapeAndType(uint32_t output_index, cons if (unknown_type_ == DEPEND_SHAPE_RANGE) { std::vector> range; auto range_ret = output_desc.GetShapeRange(range); - GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, INTERNAL_ERROR, + GE_CHK_BOOL_RET_STATUS(range_ret == GRAPH_SUCCESS, ACL_ERROR_GE_INTERNAL_ERROR, "Node[%s] is shape range type but get GetShapeRange failed, ret=%u.", node_name_.c_str(), range_ret); for (size_t k = 0; k < range.size(); ++k) { @@ -210,9 +211,9 @@ Status AicpuExtInfoHandler::UpdateShapeAndType(const GeShape &shape, DataType da AicpuShapeAndType *shape_and_type) { auto dim_num = shape.GetDimNum(); if (dim_num > aicpu::FWKAdapter::kMaxShapeDims) { - GELOGE(PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Update shape and type failed, as dim_num %zu is over max shape dims %u.", dim_num, aicpu::FWKAdapter::kMaxShapeDims); - return PARAM_INVALID; + return ACL_ERROR_GE_PARAM_INVALID; } size_t index = 0; for (; index < dim_num; ++index) { diff --git a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc index 2bca3e06..0837ffff 100755 --- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc +++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc @@ -126,6 +126,12 @@ Status KnownNodeTask::Init(TaskContext &context) { auto dump_properties = context.GetDumpProperties(); if (dump_properties.IsDumpOpen()) { davinci_model_->SetDumpProperties(dump_properties); + void *global_step = nullptr; + TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP); + if (varible_global_step != nullptr) { + global_step = varible_global_step->MutableData(); + } + davinci_model_->SetKnownShapeGlobalStep(global_step); } int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 12e98160..e74256f2 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -117,11 +117,11 @@ Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executo auto executor_type = ResolveExecutorType(node); const auto it = executors_.find(executor_type); if (it == executors_.end()) { - GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", executor_type); + GELOGE(INTERNAL_ERROR, "Failed to get executor by type: %d.", static_cast(executor_type)); return INTERNAL_ERROR; } - GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), executor_type); + GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); *executor = it->second.get(); return SUCCESS; } @@ -165,7 +165,7 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { TensorUtils::SetSize(output_tensor, output_mem_size); GE_CHK_STATUS_RET(op_desc->UpdateOutputDesc(static_cast(i), output_tensor), "hccl update output size failed."); - GELOGD("%s output desc[%u], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, + GELOGD("%s output desc[%zu], dim_size: %zu, mem_size: %ld.", node.GetName().c_str(), i, output_tensor.GetShape().GetDimNum(), output_mem_size); } return SUCCESS; @@ -189,14 +189,14 @@ Status NodeExecutorManager::InitializeExecutors() { GE_CHECK_NOTNULL(build_fn); auto executor = std::unique_ptr(build_fn()); if (executor == nullptr) { - GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", engine_type); + GELOGE(INTERNAL_ERROR, "Failed to create executor for engine type = %d", static_cast(engine_type)); return INTERNAL_ERROR; } - GELOGD("Executor of engine type = %d was created successfully", engine_type); + GELOGD("Executor of engine type = %d was created successfully", static_cast(engine_type)); auto ret = executor->Initialize(); if (ret != SUCCESS) { - GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", engine_type); + GELOGE(ret, "Failed to initialize NodeExecutor of type = %d, clear executors", static_cast(engine_type)); for (auto &executor_it : executors_) { executor_it.second->Finalize(); } diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index de583ef1..bc318124 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -554,33 +554,6 @@ Status TaskContext::SaveProfilingGraphDescInfo(uint32_t task_id, uint32_t stream tmp_compute_graph_info.model_name = dynamic_model_name; tmp_compute_graph_info.op_name = op_desc->GetName(); tmp_compute_graph_info.op_type = op_desc->GetType(); - // default - if (op_desc->GetAllInputsSize() == 0) { - tmp_compute_graph_info.input_format = { FORMAT_NULL }; - tmp_compute_graph_info.input_shape = { {0} }; - tmp_compute_graph_info.input_data_type = { DT_UNDEFINED }; - } - for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { - GeTensorDescPtr input_desc = op_desc->MutableInputDesc(i); - if (input_desc == nullptr) { - continue; - } - tmp_compute_graph_info.input_format.emplace_back(input_desc->GetFormat()); - tmp_compute_graph_info.input_shape.emplace_back(input_desc->GetShape().GetDims()); - tmp_compute_graph_info.input_data_type.emplace_back(input_desc->GetDataType()); - } - - if (op_desc->GetOutputsSize() == 0) { - tmp_compute_graph_info.output_format = { FORMAT_NULL }; - tmp_compute_graph_info.output_shape = { {0} }; - tmp_compute_graph_info.output_data_type = { DT_UNDEFINED }; - } - for (size_t j = 0; j < op_desc->GetOutputsSize(); ++j) { - GeTensorDesc output_desc = op_desc->GetOutputDesc(j); - tmp_compute_graph_info.output_format.emplace_back(output_desc.GetFormat()); - tmp_compute_graph_info.output_shape.emplace_back(output_desc.GetShape().GetDims()); - tmp_compute_graph_info.output_data_type.emplace_back(output_desc.GetDataType()); - } tmp_compute_graph_info.task_id = task_id; tmp_compute_graph_info.stream_id = stream_id; compute_graph_info.emplace_back(tmp_compute_graph_info); diff --git a/ge/session/omg.cc b/ge/session/omg.cc index 6a715822..47073fc0 100755 --- a/ge/session/omg.cc +++ b/ge/session/omg.cc @@ -1007,7 +1007,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOm(const char *model_file, const char *js } else { ErrorManager::GetInstance().ATCReportErrMessage("E10003", {"parameter", "value", "reason"}, {"om", model_file, "invalid om file"}); - GELOGE(PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "ParseModelContent failed because of invalid om file. Please check --om param."); } if (model.model_data != nullptr) { diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index 2fa7182b..4f32bd6b 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -57,9 +57,10 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { std::vector task_desc_info; uint32_t task_id = 0; uint32_t stream_id = 0; - if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) { - GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Get task_id and stream_id failed."); - return ACL_ERROR_GE_PARAM_INVALID; + auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(rt_ret, "Get task_id and stream_id failed."); + return RT_ERROR_TO_GE_STATUS(rt_ret); } TaskDescInfo tmp_task_desc_info; diff --git a/ge/single_op/single_op_manager.cc b/ge/single_op/single_op_manager.cc index d523d355..ccbdbe3f 100644 --- a/ge/single_op/single_op_manager.cc +++ b/ge/single_op/single_op_manager.cc @@ -141,7 +141,7 @@ Status SingleOpManager::GetResourceId(rtStream_t stream, uintptr_t &resource_id) auto rt_err = rtCtxGetCurrent(&rt_cur_ctx); if (rt_err != RT_ERROR_NONE) { GELOGE(rt_err, "get current context failed, runtime result is %d", static_cast(rt_err)); - return rt_err; + return RT_ERROR_TO_GE_STATUS(rt_err); } // use current context as resource key instead GELOGI("use context as resource key instead when default stream"); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 220adde8..7d092091 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -438,8 +438,8 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { auto task_type = static_cast(task_def.type()); if (task_type == RT_MODEL_TASK_KERNEL) { if (single_op.op_task_ != nullptr) { - GELOGE(UNSUPPORTED, "Do not support dynamic op with multiple tasks."); - return UNSUPPORTED; + GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Do not support dynamic op with multiple tasks."); + return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; } GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 90ddc696..a01ee0f0 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -30,8 +30,8 @@ namespace ge { auto sec_ret = memcpy_s(&fwk_op_kernel, sizeof(STR_FWK_OP_KERNEL), kernel_def_.args().data(), kernel_def_.args().size()); if (sec_ret != EOK) { - GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "memcpy failed, ret: %d", sec_ret); - return ACL_ERROR_GE_INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } auto io_addr_val = static_cast(reinterpret_cast(io_addr)); @@ -46,7 +46,7 @@ namespace ge { auto rt_ret = rtMalloc(&fwk_op_args, sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(rt_ret, "malloc arg memory failed, ret = %d", rt_ret); - return rt_ret; + return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(fwk_op_args, sizeof(STR_FWK_OP_KERNEL), &fwk_op_kernel, @@ -54,7 +54,7 @@ namespace ge { if (rt_ret != RT_ERROR_NONE) { (void)rtFree(fwk_op_args); GELOGE(rt_ret, "copy args failed, ret = %d", rt_ret); - return rt_ret; + return RT_ERROR_TO_GE_STATUS(rt_ret); } *args = fwk_op_args; return SUCCESS; @@ -96,7 +96,7 @@ namespace ge { // get kernel_ext_info auto &kernel_ext_info = kernel_def_.kernel_ext_info(); auto kernel_ext_info_size = kernel_def_.kernel_ext_info_size(); - GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, FAILED, + GE_CHK_BOOL_RET_STATUS(kernel_ext_info.size() == kernel_ext_info_size, ACL_ERROR_GE_PARAM_INVALID, "task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", kernel_ext_info.size(), kernel_ext_info_size); GE_CHK_STATUS_RET(task.SetExtInfoAndType(kernel_ext_info, kernel_id), "Init ext info failed."); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 82303894..3d001d8b 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -45,7 +45,7 @@ void FreeHbm(void *var) { Status OpTask::OpenDump(rtStream_t stream) { if (DumpManager::GetInstance().GetDumpProperties().IsSingleOpNeedDump()) { - GELOGI("Dump is open in single op,start to set dump info"); + GELOGI("Dump is open in single op, start to set dump info"); std::vector input_addrs; std::vector output_adds; auto input_size = op_desc_->GetInputsSize(); @@ -54,10 +54,10 @@ Status OpTask::OpenDump(rtStream_t stream) { size_t arg_num = 0; GetIoAddr(arg_base, arg_num); if (arg_num < input_size + output_size) { - GELOGE(FAILED, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "io_addrs_for_dump_ size %zu is not equal input and output size %zu", arg_num, input_size + output_size); - return FAILED; + return ACL_ERROR_GE_INTERNAL_ERROR; } for (size_t i = 0; i < input_size; i++) { @@ -120,11 +120,11 @@ Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_works size_t arg_num = 0; GetIoAddr(arg_base, arg_num); if (arg_num < all_addresses.size()) { - GELOGE(INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[%s] arg number mismatches, expect at least = %zu, but got = %zu", op_desc_->GetName().c_str(), all_addresses.size(), arg_num); - return INTERNAL_ERROR; + return ACL_ERROR_GE_INTERNAL_ERROR; } for (void *addr : all_addresses) { @@ -178,8 +178,8 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { } if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); - return RT_FAILED; + GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->stub_name_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %s", this->stub_name_.c_str()); auto status = OpenDump(stream); @@ -199,8 +199,8 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve run_info.block_dim = 0; auto ret = optiling::OpParaCalculate(*node_, run_info); if (ret != GRAPH_SUCCESS) { - GELOGE(FAILED, "Failed to invoke OpParaCalculate. ret = %u", ret); - return FAILED; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to invoke OpParaCalculate. ret = %u", ret); + return ACL_ERROR_GE_INTERNAL_ERROR; } block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); @@ -223,8 +223,8 @@ Status TbeOpTask::UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc } else { std::vector storage_shape; if (!AttrUtils::GetListInt(src_tensor, ge::ATTR_NAME_STORAGE_SHAPE, storage_shape)) { - GELOGE(PARAM_INVALID, "Failed to get storage_shape while storage_format was set"); - return PARAM_INVALID; + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Failed to get storage_shape while storage_format was set"); + return ACL_ERROR_GE_INTERNAL_ERROR; } GELOGD("Storage format set. update shape to [%s], and original shape to [%s]", @@ -273,7 +273,9 @@ Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { std::vector ws_offsets; for (auto ws_size : workspace_sizes) { // alignment and padding should be done in OpParaCalculate - GE_CHK_STATUS_RET_NOLOG(CheckInt64AddOverflow(total_size, ws_size)); + if (CheckInt64AddOverflow(total_size, ws_size) != SUCCESS) { + return ACL_ERROR_GE_INTERNAL_ERROR; + } ws_offsets.emplace_back(total_size); total_size += ws_size; } @@ -321,8 +323,9 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, } if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { - GELOGE(INTERNAL_ERROR, "[%s] Failed to update kernel args.", node_->GetName().c_str()); - return INTERNAL_ERROR; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[%s] Failed to update kernel args.", + node_->GetName().c_str()); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); @@ -360,7 +363,7 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint num_inputs_, num_outputs_, unknown_type_)); - GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, FAILED, "Malloc aicpu_ext_handle mem failed!"); + GE_CHK_BOOL_RET_STATUS(aicpu_ext_handle_ != nullptr, ACL_ERROR_GE_MEMORY_ALLOCATION, "Malloc aicpu_ext_handle mem failed!"); Status ret = aicpu_ext_handle_->Parse(kernel_ext_info); if (ret != SUCCESS) { @@ -418,7 +421,7 @@ Status AiCpuBaseTask::UpdateExtInfo(const std::vector &input_desc, "Input[%zu] update input shape failed.", input_index); continue; } - GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(non_const_index < input_desc.size(), ACL_ERROR_GE_PARAM_INVALID, "Input_desc size is %zu, but get non_const_index is %zu", input_desc.size(), non_const_index); GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateInputShapeAndType(input_index, input_desc[non_const_index]), @@ -511,7 +514,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto arg_base++; continue; } - GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), PARAM_INVALID, + GE_CHK_BOOL_RET_STATUS(non_const_index < inputs.size(), ACL_ERROR_GE_PARAM_INVALID, "Input size is %zu, but get non_const_index is %zu", inputs.size(), non_const_index); auto addr = inputs[non_const_index].data; @@ -561,15 +564,15 @@ Status AiCpuTask::LaunchKernel(rtStream_t stream) { RT_MEMCPY_HOST_TO_DEVICE_EX, stream); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); - return RT_FAILED; + GELOGE(ret, "rtMemcpyAsync workspace data failed. ret = %d, task = %s", ret, this->op_type_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("To invoke rtKernelLaunchEx. task = %s", this->op_type_.c_str()); ret = rtKernelLaunchEx(args_, arg_size_, 0, stream); if (ret != RT_ERROR_NONE) { - GELOGE(RT_FAILED, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); - return RT_FAILED; + GELOGE(ret, "Invoke rtKernelLaunch failed. ret = %d, task = %s", ret, this->op_type_.c_str()); + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); @@ -747,9 +750,9 @@ Status AiCpuTask::InitForSummaryAndCopy() { Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { if (kernel_def.args_size() > sizeof(STR_FWK_OP_KERNEL)) { - GELOGE(PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", + GELOGE(ACL_ERROR_GE_PARAM_INVALID, "sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def.args_size()); - return PARAM_INVALID; + return ACL_ERROR_GE_PARAM_INVALID; } GE_CHK_RT_RET(rtMalloc(©_workspace_buf_, kernel_def.task_info_size(), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMemcpy(copy_workspace_buf_, kernel_def.task_info_size(), @@ -759,8 +762,8 @@ Status AiCpuTask::SetMemCopyTask(const domi::KernelExDef &kernel_def) { auto sec_ret = memcpy_s(&aicpu_task, sizeof(STR_FWK_OP_KERNEL), kernel_def.args().data(), kernel_def.args().size()); if (sec_ret != EOK) { - GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); - return FAILED; + GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "memcpy failed, ret: %d", sec_ret); + return ACL_ERROR_GE_MEMORY_OPERATE_FAILED; } aicpu_task.fwkKernelBase.fwk_kernel.inputOutputAddr = reinterpret_cast(copy_ioaddr_dev_); @@ -844,7 +847,7 @@ Status AiCpuCCTask::LaunchKernel(rtStream_t stream) { sm_desc, stream, dump_flag_); if (ret != RT_ERROR_NONE) { GELOGE(ret, "Invoke rtCpuKernelLaunch failed. ret = %d", ret); - return ret; + return RT_ERROR_TO_GE_STATUS(ret); } GELOGI("[TASK_INFO] %lu/%s", kernel_id_, op_type_.c_str()); GELOGD("Invoke rtCpuKernelLaunch succeeded"); diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 9ba30b8e..6eee61d0 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -242,7 +242,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & auto rtRet = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); if (rtRet != RT_ERROR_NONE) { GELOGE(rtRet, "rtMemcpy args failed, size = %zu, ret = %d", arg_size, static_cast(rtRet)); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } const domi::KernelContext &context = kernel_def_.context(); @@ -261,7 +261,7 @@ Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam & rtRet = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); if (rtRet != RT_ERROR_NONE) { GELOGE(rtRet, "rtMemcpy addresses failed, ret = %d", static_cast(rtRet)); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } } @@ -287,7 +287,7 @@ Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶ auto rtRet = rtGetFunctionByName(stub_name_.c_str(), &stub_func); if (rtRet != SUCCESS) { GELOGE(rtRet, "rtGetFunctionByName failed."); - return rtRet; + return RT_ERROR_TO_GE_STATUS(rtRet); } task.SetStubFunc(stub_name_, stub_func); diff --git a/inc/external/ge/ge_api_error_codes.h b/inc/external/ge/ge_api_error_codes.h index 3d63aced..e77f817c 100644 --- a/inc/external/ge/ge_api_error_codes.h +++ b/inc/external/ge/ge_api_error_codes.h @@ -109,8 +109,13 @@ GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_NOT_EXIST, "AIPP parameter not exist."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_AIPP_MODE_INVALID, "AIPP mode invalid."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "Task type invalid."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, "Kernel type invalid."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_PLGMGR_PATH_INVALID, "Plugin path is invalid."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID, "Format is invalid when transferring shape."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID, "Shape is invalid when transferring shape."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID, "Datatype is invalid when transferring shape."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_ALLOCATION, "Memory allocation error."); +GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate memory."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_INTERNAL_ERROR, "Internal error."); GE_ERRORNO_EXTERNAL(ACL_ERROR_GE_LOAD_MODEL, "Load model error."); diff --git a/inc/external/ge/ge_error_codes.h b/inc/external/ge/ge_error_codes.h index 20a7e0f9..041fc7ae 100644 --- a/inc/external/ge/ge_error_codes.h +++ b/inc/external/ge/ge_error_codes.h @@ -38,7 +38,12 @@ static const uint32_t ACL_ERROR_GE_AIPP_NOT_EXIST = 145015; static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; +static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021; +static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022; static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; +static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; static const uint32_t ACL_ERROR_GE_LOAD_MODEL = 545001; static const uint32_t ACL_ERROR_GE_EXEC_LOAD_MODEL_PARTITION_FAILED = 545002; @@ -49,6 +54,7 @@ static const uint32_t ACL_ERROR_GE_EXEC_RELEASE_MODEL_DATA = 545006; static const uint32_t ACL_ERROR_GE_COMMAND_HANDLE = 545007; static const uint32_t ACL_ERROR_GE_GET_TENSOR_INFO = 545008; static const uint32_t ACL_ERROR_GE_UNLOAD_MODEL = 545009; + #ifdef __cplusplus } // namespace ge #endif diff --git a/inc/framework/common/debug/ge_log.h b/inc/framework/common/debug/ge_log.h index 4a32af36..07cd1664 100644 --- a/inc/framework/common/debug/ge_log.h +++ b/inc/framework/common/debug/ge_log.h @@ -38,75 +38,53 @@ extern "C" { enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; class GeLog { -public: + public: + static uint64_t GetTid() { #ifdef __GNUC__ -static pid_t GetTid() { - thread_local static pid_t tid = syscall(__NR_gettid); - return tid; -} + thread_local static uint64_t tid = static_cast(syscall(__NR_gettid)); #else -static int GetTid() { - thread_local static int tid = static_cast(GetCurrentThreadId()); - return tid; -} + thread_local static uint64_t tid = static_cast(GetCurrentThreadId()); #endif + return tid; + } }; inline bool IsLogEnable(int module_name, int log_level) { int32_t enable = CheckLogLevel(module_name, log_level); // 1:enable, 0:disable - if (enable == 1) { - return true; - } - return false; + return (enable == 1); } -#define GELOGE(ERROR_CODE, fmt, ...) \ +#define GELOGE(ERROR_CODE, fmt, ...) \ dlog_error(GE_MODULE_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) -#define GELOGW(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGI(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGD(fmt, ...) \ - if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGW(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_WARN)) \ + dlog_warn(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGI(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) \ + dlog_info(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) +#define GELOGD(fmt, ...) \ + if (IsLogEnable(GE_MODULE_NAME, DLOG_DEBUG)) \ + dlog_debug(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) + #define GEEVENT(fmt, ...) dlog_event(GE_MODULE_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGO(fmt, ...) \ - Dlog(GE_MODULE_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GELOGT(VALUE, fmt, ...) \ - do { \ - TraceStatus stat = VALUE; \ - const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ - int idx = static_cast(stat); \ - char *k = const_cast("status"); \ - char *v = const_cast(TraceStatStr[idx]); \ - KeyValue kv = {k, v}; \ - DlogWithKV(static_cast(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ + +#define GELOGT(VALUE, fmt, ...) \ + do { \ + TraceStatus stat = VALUE; \ + const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ + int idx = static_cast(stat); \ + char *k = const_cast("status"); \ + char *v = const_cast(TraceStatStr[idx]); \ + KeyValue kv = {k, v}; \ + DlogWithKV(static_cast(GE_MODULE_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, \ + ##__VA_ARGS__); \ } while (0) -#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ +#define GE_LOG_ERROR(MOD_NAME, ERROR_CODE, fmt, ...) \ dlog_error(MOD_NAME, "%lu %s: ErrorNo: %d(%s) " fmt, GeLog::GetTid(), __FUNCTION__, ERROR_CODE, \ ((GE_GET_ERRORNO_STR(ERROR_CODE)).c_str()), ##__VA_ARGS__) -#define GE_LOG_WARN(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_WARN)) dlog_warn(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_INFO(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_INFO)) dlog_info(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_DEBUG(MOD_NAME, fmt, ...) \ - if (IsLogEnable(MOD_NAME, DLOG_DEBUG)) dlog_debug(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_EVENT(MOD_NAME, fmt, ...) dlog_event(MOD_NAME, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) -#define GE_LOG_OPLOG(MOD_NAME, fmt, ...) \ - Dlog(MOD_NAME, DLOG_OPLOG, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__) - -#define GE_LOG_TRACE(MOD_NAME, value, fmt, ...) \ - do { \ - TraceStatus stat = value; \ - const char *const TraceStatStr[] = {"INIT", "RUNNING", "WAITING", "STOP"}; \ - int idx = static_cast(stat); \ - char *k = const_cast("status"); \ - char *v = const_cast(TraceStatStr[idx]); \ - KeyValue kv = {k, v}; \ - DlogWithKV(static_cast(MOD_NAME), DLOG_TRACE, &kv, 1, "%lu %s:" fmt, GeLog::GetTid(), __FUNCTION__, ##__VA_ARGS__); \ - } while (0) // print memory when it is greater than 1KB. #define GE_PRINT_DYNAMIC_MEMORY(FUNC, PURPOSE, SIZE) \ diff --git a/metadef b/metadef index bb864122..8ab60be2 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit bb86412204fc72fa8fe4063e6044090dfd714321 +Subproject commit 8ab60be2870b80b1ec952bb21c7f05ae2a624984 diff --git a/parser b/parser index d85b5fc6..98f17f4a 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit d85b5fc685b9e1f8dbee778c9c7b3ab6f379af79 +Subproject commit 98f17f4a2a37f283797858eabefa9dba1d06a66b diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index abff433c..a1ec8248 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -683,7 +683,7 @@ set(MULTI_PARTS_TEST_FILES "common/format_transfer_nchw_fractalz_unittest.cc" "common/format_transfer_hwcn_fractalz_unittest.cc" "common/format_transfer_nhwc_fractalz_unittest.cc" - #"common/format_transfer_fractal_nz_unittest.cc" + "common/format_transfer_fractal_nz_unittest.cc" "common/format_transfer_fractal_zz_unittest.cc" "common/format_transfer_nhwc_5d_unittest.cc" "common/format_transfer_5d_nchw_unittest.cc" diff --git a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc index 6e5158df..b0a39396 100644 --- a/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc +++ b/tests/ut/ge/common/format_transfer_5d_nhwc_unittest.cc @@ -679,7 +679,7 @@ TEST_F(UtestFormatTransfer5dNhwc, nc1hwc0_to_nhwc_float2) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransfer5dNhwc, invalid_src_format) { diff --git a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc index e809cf1b..3f195ef2 100644 --- a/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc +++ b/tests/ut/ge/common/format_transfer_c1hwncoc0_hwcn_unittest.cc @@ -158,7 +158,7 @@ TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_fp16_success_lt_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferC1hwncoc0Hwcn, sixd_to_hwcn_gp16_success_eq_cube) { diff --git a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc index fe3dd452..70c07d45 100644 --- a/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fractal_nz_unittest.cc @@ -249,8 +249,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape1_uint8_3) { } */ - -TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { +/*TEST_F(UtestFormatTransferNdFractNz, nd_shape2_uint8_1) { uint8_t data[32 * 32] = { 47, 78, 47, 180, 246, 76, 157, 127, 63, 0, 168, 23, 148, 198, 180, 190, 43, 187, 76, 67, 77, 246, 11, 149, 240, 236, 136, 123, 51, 95, 7, 163, 163, 64, 157, 230, 247, 122, 67, 106, 150, 20, 231, 118, 43, 208, @@ -2157,7 +2156,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape3_fp16) { for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { EXPECT_EQ((reinterpret_cast(result2.data.get()))[i], data[i]); } -} +}*/ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { uint16_t data[2 * 2 * 17 * 4] = { @@ -2333,7 +2332,7 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp16) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractNz, nd_shape5_fp16) { @@ -4785,6 +4784,8 @@ TEST_F(UtestFormatTransferNdFractNz, nd_shape4_fp32) { for (int i = 0; i < sizeof(data) / sizeof(data[0]); ++i) { EXPECT_EQ((reinterpret_cast(result2.data.get()))[i], data[i]); } + EXPECT_EQ(transfer2.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractNz, nchw_shape4_fp32) { @@ -9059,7 +9060,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_shape) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { @@ -9079,7 +9080,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { @@ -9094,8 +9095,7 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_format) { FormatTransferFractalNz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); - EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractNz, invalid_dst_shape) { @@ -9136,6 +9136,24 @@ TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type2) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); } +TEST_F(UtestFormatTransferNdFractNz, invalid_src_data_type3) { + uint16_t data[1 * 1 * 1 * 16 * 16] = {0}; + TransArgs args{reinterpret_cast(data), + FORMAT_FRACTAL_NZ, + FORMAT_NHWC, + {1, 1, 1, 16, 16}, + { + 1, + 1, + 4, + 4, + }, + DT_VARIANT}; + TransResult result; + FormatTransferFractalNzND transfer; + EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); +} + TEST_F(UtestFormatTransferNdFractNz, invalid_dst_format2) { uint16_t data[1 * 1 * 1 * 1 * 16 * 16] = {0}; TransArgs args{reinterpret_cast(data), diff --git a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc index 6278b958..8b1afa24 100644 --- a/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fractal_zz_unittest.cc @@ -1894,7 +1894,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16_1) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { @@ -2071,7 +2071,7 @@ TEST_F(UtestFormatTransferNdFractZz, nd_shape4_fp16) { } EXPECT_EQ( transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), - UNSUPPORTED); + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNdFractZz, nd_shape5_fp16) { @@ -7879,7 +7879,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_shape) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { @@ -7899,7 +7899,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_data_type) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); } TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { @@ -7914,7 +7914,7 @@ TEST_F(UtestFormatTransferNdFractZz, invalid_src_format) { FormatTransferFractalZz transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), - PARAM_INVALID); + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); EXPECT_EQ(TransFormat(args, result), UNSUPPORTED); } diff --git a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc index 6c18aa34..25caa741 100644 --- a/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fracz_hwcn_unittest.cc @@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_eq_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferFracZHwcn, fracz_to_hwcn_fp16_success_gt_cube) { diff --git a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc index 46d3ae86..93160070 100644 --- a/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc +++ b/tests/ut/ge/common/format_transfer_fracz_nchw_unittest.cc @@ -302,7 +302,7 @@ TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_eq_cube) { } Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferFraczNchw, fracz_to_nchw_fp16_success_gt_cube) { diff --git a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc index e468f5ac..1e6b90dd 100644 --- a/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc +++ b/tests/ut/ge/common/format_transfer_hwcn_c1hwncoc0_unittest.cc @@ -75,7 +75,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_format_nchw) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format_nc1khkwhwc0) { @@ -142,7 +142,7 @@ TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_src_shape3) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, PARAM_INVALID); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); } TEST_F(UtestFormatTransferHwcnC1hwncoc0, hwcn_to_6d_invalid_dst_format) { diff --git a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc index 67104bf8..610bd7d3 100644 --- a/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nchw_5d_unittest.cc @@ -633,5 +633,14 @@ TEST_F(UtestFormatTransferNchw5d, unsupport_dst_format) { TransResult result; EXPECT_NE(transfer.TransFormat(args, result), SUCCESS); } + +TEST_F(UtestFormatTransferNchw5d, invalid_data_format) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferNchwNc1hwc0 transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc index 0944afd7..bc5a8754 100644 --- a/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nhwc_5d_unittest.cc @@ -719,7 +719,7 @@ TEST_F(UtestFormatTransferNhwc5d, invalid_src_format) { EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); Status status = transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape); - EXPECT_EQ(status, UNSUPPORTED); + EXPECT_EQ(status, ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); } TEST_F(UtestFormatTransferNhwc5d, invalid_dst_shape2) { @@ -751,5 +751,20 @@ TEST_F(UtestFormatTransferNhwc5d, unsupport_dst_format) { FormatTransferNhwcNc1hwc0 transfer; EXPECT_EQ(transfer.TransFormat(args, result), PARAM_INVALID); } + +TEST_F(UtestFormatTransferNhwc5d, invalid_data_shape) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferNhwcNc1hwc0 transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args2{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_STRING}; + FormatTransferNhwcNc1hwc0 transfer2; + EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc index f6017fb7..a6dfffb0 100644 --- a/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc +++ b/tests/ut/ge/common/format_transfer_nhwc_fractalz_unittest.cc @@ -5353,5 +5353,44 @@ TEST_F(UtestFormatTransferNhwcFz, build_transfer_uint8) { auto transfer = BuildFormatTransfer(args); EXPECT_NE(transfer, nullptr); } + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_type) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_VARIANT}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID); +} + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_format) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_CHWN, FORMAT_FRACTAL_NZ, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} + +TEST_F(UtestFormatTransferNhwcFz, invalid_data_shape) { + uint16_t data[1 * 4 * 4 * 1] = {0}; + TransArgs args{ + reinterpret_cast(data), FORMAT_NHWC, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer; + EXPECT_EQ(transfer.TransShape(args.src_format, args.src_shape, args.src_data_type, args.dst_format, args.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args2{ + reinterpret_cast(data), FORMAT_HWCN, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer2; + EXPECT_EQ(transfer2.TransShape(args2.src_format, args2.src_shape, args2.src_data_type, args2.dst_format, args2.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); + + TransArgs args3{ + reinterpret_cast(data), FORMAT_NCHW, FORMAT_FRACTAL_Z, {1, 4, 4}, {1, 1, 1, 16, 16}, DT_FLOAT16}; + FormatTransferFractalZ transfer3; + EXPECT_EQ(transfer3.TransShape(args3.src_format, args3.src_shape, args3.src_data_type, args3.dst_format, args3.dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/common/format_transfer_transpose_unittest.cc b/tests/ut/ge/common/format_transfer_transpose_unittest.cc index 258b77fc..d56e06c0 100644 --- a/tests/ut/ge/common/format_transfer_transpose_unittest.cc +++ b/tests/ut/ge/common/format_transfer_transpose_unittest.cc @@ -4654,5 +4654,27 @@ TEST_F(UtestFormatTranspose, chwn_to_hwcn2) { EXPECT_EQ((reinterpret_cast(result.data.get()))[i], ret[i]); } } + +TEST_F(UtestFormatTranspose, invalid_data_shape) { + FormatTransferTranspose transfer; + std::vector dst_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, std::vector({}), DT_FLOAT16, FORMAT_HWCN, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID); +} + +TEST_F(UtestFormatTranspose, invalid_src_format) { + FormatTransferTranspose transfer; + std::vector dst_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NC1HWC0, std::vector({1, 3, 8, 8}), DT_FLOAT16, FORMAT_HWCN, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} + +TEST_F(UtestFormatTranspose, invalid_dst_format) { + FormatTransferTranspose transfer; + std::vector dst_shape; + std::vector src_shape; + EXPECT_EQ(transfer.TransShape(FORMAT_NCHW, src_shape, DT_FLOAT16, FORMAT_C1HWNC0, dst_shape), + ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID); +} } // namespace formats } // namespace ge diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index b803b624..47968345 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -46,7 +46,7 @@ class UtestDavinciModel : public testing::Test { } }; -TEST_F(UtestDavinciModel, init_success) { +/*TEST_F(UtestDavinciModel, init_success) { DavinciModel model(0, nullptr); ComputeGraphPtr graph = make_shared("default"); ProfilingManager::Instance().is_load_profiling_ = true; @@ -130,7 +130,7 @@ TEST_F(UtestDavinciModel, init_success) { EXPECT_EQ(outputs.size(), 1); ProfilingManager::Instance().is_load_profiling_ = false; -} +}*/ TEST_F(UtestDavinciModel, init_data_op) { DavinciModel model(0, nullptr);