From f826880f7502e8bbf1ad96eca53d4882c9cdd355 Mon Sep 17 00:00:00 2001 From: wjm Date: Tue, 8 Jun 2021 04:28:41 +0800 Subject: [PATCH 01/51] fix sc --- ge/graph/preprocess/graph_preprocess.cc | 119 ++++++++++++--------- ge/graph/preprocess/graph_preprocess.h | 3 +- ge/ir_build/ge_ir_build.cc | 2 +- .../graph/preprocess/graph_preprocess_unittest.cc | 15 +++ 4 files changed, 84 insertions(+), 55 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 0c4adeea..a73c6a96 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1420,9 +1420,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } -Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag) { +Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc) { auto format = desc.GetFormat(); auto origin_format = desc.GetOriginFormat(); + auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); @@ -1439,6 +1440,63 @@ Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTens return SUCCESS; } +Status GraphPrepare::UpdateDataInputOutputDesc(GeAttrValue::INT index, OpDescPtr &op, GeTensorDesc &desc) { + auto data_type = desc.GetDataType(); + uint32_t length = 1; + bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); + if (!type_ret) { + std::string reason = "Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "] of index:" + + std::to_string(index) + " input tensor is not support"; + REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); + GELOGE(PARAM_INVALID, "[Check][Param] Input datatype %s is not support.", + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + int64_t desc_shape = desc.GetShape().GetShapeSize(); + FMK_INT64_UINT32_MULCHECK(desc_shape, length); + int64_t shape_size = desc_shape * length; + GE_IF_BOOL_EXEC(shape_size == 0 && desc.GetShape().GetDimNum() == 0, shape_size = static_cast(length)); + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc, size) != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Get size of user input tensor failed, index:%ld", index); + GELOGE(INTERNAL_ERROR, "[Get][Size] of user input tensor failed, index:%ld", index); return FAILED); + bool size_check = (size != 0 && shape_size != size); + if (size_check) { + std::string reason = "input tensor[index:" + std::to_string(index) + "]'s data size[" + std::to_string(size) + + "] != shape_size[" + std::to_string(size) + "], check invalid"; + REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); + GELOGE(PARAM_INVALID, "[Check][Param] input data size = %ld, shape_size = %ld.", size, shape_size); + return FAILED; + } + ge::TensorUtils::SetSize(desc, shape_size); + + auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); + if (!tune_flag) { + graphStatus graph_ret = op->UpdateInputDesc(0, desc); + if (graph_ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update input desc of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + GELOGE(graph_ret, "[Update][InputDesc] of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + return graph_ret; + } + // Size will be recalculated in the build stage + ge::TensorUtils::SetSize(desc, 0); + graph_ret = op->UpdateOutputDesc(0, desc); + if (graph_ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update output desc of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + GELOGE(graph_ret, "[Update][OutputDesc] of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + return graph_ret; + } + } else { + GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); + } + + return SUCCESS; +} + Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { // Get shape range of input in dynamic_execute mode @@ -1471,63 +1529,18 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input, } GeTensorDesc desc(user_input[index].GetTensorDesc()); // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. - auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); - ret = CheckInternalFormat(input_node, desc, tune_flag); + ret = CheckInternalFormat(input_node, desc); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "[Check][InternalFormat] on %s failed", op->GetName().c_str()); return ret; } - auto data_type = desc.GetDataType(); - uint32_t length = 1; - bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); - if (!type_ret) { - std::string reason = "Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "] of index:" + - std::to_string(index) + " input tensor is not support"; - REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); - GELOGE(PARAM_INVALID, "[Check][Param] Input datatype %s is not support.", - TypeUtils::DataTypeToSerialString(data_type).c_str()); - return FAILED; - } - int64_t desc_shape = desc.GetShape().GetShapeSize(); - FMK_INT64_UINT32_MULCHECK(desc_shape, length); - int64_t shape_size = desc_shape * length; - GE_IF_BOOL_EXEC(shape_size == 0 && desc.GetShape().GetDimNum() == 0, shape_size = static_cast(length)); - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc, size) != GRAPH_SUCCESS, - REPORT_CALL_ERROR("E19999", "Get size of user input tensor failed, index:%ld", index); - GELOGE(INTERNAL_ERROR, "[Get][Size] of user input tensor failed, index:%ld", index); - return FAILED); - bool size_check = (size != 0 && shape_size != size); - if (size_check) { - std::string reason = "input tensor[index:" + std::to_string(index) + "]'s data size[" + std::to_string(size) + - "] != shape_size[" + std::to_string(size) + "], check invalid"; - REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); - GELOGE(PARAM_INVALID, "[Check][Param] input data size = %ld, shape_size = %ld.", size, shape_size); - return FAILED; - } - ge::TensorUtils::SetSize(desc, shape_size); - if (!tune_flag) { - graphStatus graph_ret = op->UpdateInputDesc(0, desc); - if (graph_ret != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Update input desc of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - GELOGE(graph_ret, "[Update][InputDesc] of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - return graph_ret; - } - // Size will be recalculated in the build stage - ge::TensorUtils::SetSize(desc, 0); - graph_ret = op->UpdateOutputDesc(0, desc); - if (graph_ret != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Update output desc of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - GELOGE(graph_ret, "[Update][OutputDesc] of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - return graph_ret; - } - } else { - GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); + + ret = UpdateDataInputOutputDesc(index, op, desc); + if (ret != SUCCESS) { + GELOGE(FAILED, "[Update][DataInputOutputDesc] on %s failed", op->GetName().c_str()); + return ret; } + if (!dynamic_shape_range_vec.empty()) { ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); GE_CHK_STATUS_RET(ret, "[Update][DynamicInputShapeRange] on %s failed.", op->GetName().c_str()); diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index 584f4d16..22bc566c 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -63,7 +63,8 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); - Status CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag); + Status CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc); + Status UpdateDataInputOutputDesc(GeAttrValue::INT index, OpDescPtr &op, GeTensorDesc &desc); Status UpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 21db83aa..befffa93 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -559,8 +559,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map user_input = {input1}; + std::map graph_option; + auto ret = graph_prepare.UpdateInput(user_input, graph_option); + EXPECT_EQ(ret, ge::FAILED); +} + TEST_F(UtestGraphPreproces, test_check_user_input) { ge::GraphPrepare graph_prepare; graph_prepare.compute_graph_ = BuildGraph1(); From 2abf8be62178511bf6c073ff821b309a8e2817ee Mon Sep 17 00:00:00 2001 From: wjm Date: Tue, 8 Jun 2021 04:28:41 +0800 Subject: [PATCH 02/51] fix sc --- ge/graph/preprocess/graph_preprocess.cc | 119 ++++++++++++--------- ge/graph/preprocess/graph_preprocess.h | 3 +- ge/ir_build/ge_ir_build.cc | 2 +- .../graph/preprocess/graph_preprocess_unittest.cc | 15 +++ 4 files changed, 84 insertions(+), 55 deletions(-) diff --git a/ge/graph/preprocess/graph_preprocess.cc b/ge/graph/preprocess/graph_preprocess.cc index 0c4adeea..a73c6a96 100644 --- a/ge/graph/preprocess/graph_preprocess.cc +++ b/ge/graph/preprocess/graph_preprocess.cc @@ -1420,9 +1420,10 @@ Status GraphPrepare::AdjustDataOpOutput(const NodePtr &node) { return SUCCESS; } -Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag) { +Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc) { auto format = desc.GetFormat(); auto origin_format = desc.GetOriginFormat(); + auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); bool need_check_internal_format = (!IsTansDataOpData(input_node)) && (!options_.is_single_op) && (!tune_flag); if (need_check_internal_format) { bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format); @@ -1439,6 +1440,63 @@ Status GraphPrepare::CheckInternalFormat(const NodePtr &input_node, const GeTens return SUCCESS; } +Status GraphPrepare::UpdateDataInputOutputDesc(GeAttrValue::INT index, OpDescPtr &op, GeTensorDesc &desc) { + auto data_type = desc.GetDataType(); + uint32_t length = 1; + bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); + if (!type_ret) { + std::string reason = "Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "] of index:" + + std::to_string(index) + " input tensor is not support"; + REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); + GELOGE(PARAM_INVALID, "[Check][Param] Input datatype %s is not support.", + TypeUtils::DataTypeToSerialString(data_type).c_str()); + return FAILED; + } + int64_t desc_shape = desc.GetShape().GetShapeSize(); + FMK_INT64_UINT32_MULCHECK(desc_shape, length); + int64_t shape_size = desc_shape * length; + GE_IF_BOOL_EXEC(shape_size == 0 && desc.GetShape().GetDimNum() == 0, shape_size = static_cast(length)); + int64_t size = 0; + GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc, size) != GRAPH_SUCCESS, + REPORT_CALL_ERROR("E19999", "Get size of user input tensor failed, index:%ld", index); + GELOGE(INTERNAL_ERROR, "[Get][Size] of user input tensor failed, index:%ld", index); return FAILED); + bool size_check = (size != 0 && shape_size != size); + if (size_check) { + std::string reason = "input tensor[index:" + std::to_string(index) + "]'s data size[" + std::to_string(size) + + "] != shape_size[" + std::to_string(size) + "], check invalid"; + REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); + GELOGE(PARAM_INVALID, "[Check][Param] input data size = %ld, shape_size = %ld.", size, shape_size); + return FAILED; + } + ge::TensorUtils::SetSize(desc, shape_size); + + auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); + if (!tune_flag) { + graphStatus graph_ret = op->UpdateInputDesc(0, desc); + if (graph_ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update input desc of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + GELOGE(graph_ret, "[Update][InputDesc] of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + return graph_ret; + } + // Size will be recalculated in the build stage + ge::TensorUtils::SetSize(desc, 0); + graph_ret = op->UpdateOutputDesc(0, desc); + if (graph_ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Update output desc of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + GELOGE(graph_ret, "[Update][OutputDesc] of op:%s(%s) failed, index:0", + op->GetName().c_str(), op->GetType().c_str()); + return graph_ret; + } + } else { + GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); + } + + return SUCCESS; +} + Status GraphPrepare::UpdateInput(const std::vector &user_input, const std::map &graph_option) { // Get shape range of input in dynamic_execute mode @@ -1471,63 +1529,18 @@ Status GraphPrepare::UpdateInput(const std::vector &user_input, } GeTensorDesc desc(user_input[index].GetTensorDesc()); // data maybe internal format [FRACTAL_NZ] at singleop process such as GEMM. - auto tune_flag = (options_.build_mode == BUILD_MODE_TUNING) && (options_.build_step == BUILD_STEP_AFTER_BUILDER); - ret = CheckInternalFormat(input_node, desc, tune_flag); + ret = CheckInternalFormat(input_node, desc); if (ret != SUCCESS) { GELOGE(INTERNAL_ERROR, "[Check][InternalFormat] on %s failed", op->GetName().c_str()); return ret; } - auto data_type = desc.GetDataType(); - uint32_t length = 1; - bool type_ret = TypeUtils::GetDataTypeLength(data_type, length); - if (!type_ret) { - std::string reason = "Input datatype[" + TypeUtils::DataTypeToSerialString(data_type) + "] of index:" + - std::to_string(index) + " input tensor is not support"; - REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); - GELOGE(PARAM_INVALID, "[Check][Param] Input datatype %s is not support.", - TypeUtils::DataTypeToSerialString(data_type).c_str()); - return FAILED; - } - int64_t desc_shape = desc.GetShape().GetShapeSize(); - FMK_INT64_UINT32_MULCHECK(desc_shape, length); - int64_t shape_size = desc_shape * length; - GE_IF_BOOL_EXEC(shape_size == 0 && desc.GetShape().GetDimNum() == 0, shape_size = static_cast(length)); - int64_t size = 0; - GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(desc, size) != GRAPH_SUCCESS, - REPORT_CALL_ERROR("E19999", "Get size of user input tensor failed, index:%ld", index); - GELOGE(INTERNAL_ERROR, "[Get][Size] of user input tensor failed, index:%ld", index); - return FAILED); - bool size_check = (size != 0 && shape_size != size); - if (size_check) { - std::string reason = "input tensor[index:" + std::to_string(index) + "]'s data size[" + std::to_string(size) + - "] != shape_size[" + std::to_string(size) + "], check invalid"; - REPORT_INPUT_ERROR("E19025", std::vector({"reason"}), std::vector({reason})); - GELOGE(PARAM_INVALID, "[Check][Param] input data size = %ld, shape_size = %ld.", size, shape_size); - return FAILED; - } - ge::TensorUtils::SetSize(desc, shape_size); - if (!tune_flag) { - graphStatus graph_ret = op->UpdateInputDesc(0, desc); - if (graph_ret != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Update input desc of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - GELOGE(graph_ret, "[Update][InputDesc] of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - return graph_ret; - } - // Size will be recalculated in the build stage - ge::TensorUtils::SetSize(desc, 0); - graph_ret = op->UpdateOutputDesc(0, desc); - if (graph_ret != GRAPH_SUCCESS) { - REPORT_CALL_ERROR("E19999", "Update output desc of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - GELOGE(graph_ret, "[Update][OutputDesc] of op:%s(%s) failed, index:0", - op->GetName().c_str(), op->GetType().c_str()); - return graph_ret; - } - } else { - GELOGI("data %s skip update info in tune mode", op->GetName().c_str()); + + ret = UpdateDataInputOutputDesc(index, op, desc); + if (ret != SUCCESS) { + GELOGE(FAILED, "[Update][DataInputOutputDesc] on %s failed", op->GetName().c_str()); + return ret; } + if (!dynamic_shape_range_vec.empty()) { ret = UpdateDynamicInputShapeRange(index, dynamic_shape_range_vec, op, desc); GE_CHK_STATUS_RET(ret, "[Update][DynamicInputShapeRange] on %s failed.", op->GetName().c_str()); diff --git a/ge/graph/preprocess/graph_preprocess.h b/ge/graph/preprocess/graph_preprocess.h index 584f4d16..22bc566c 100755 --- a/ge/graph/preprocess/graph_preprocess.h +++ b/ge/graph/preprocess/graph_preprocess.h @@ -63,7 +63,8 @@ class GraphPrepare { Status CheckRefOp(); Status SetRtContext(rtContext_t rt_context, rtCtxMode_t mode); Status AdjustDataOpOutput(const NodePtr &node); - Status CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc, bool tune_flag); + Status CheckInternalFormat(const NodePtr &input_node, const GeTensorDesc &desc); + Status UpdateDataInputOutputDesc(GeAttrValue::INT index, OpDescPtr &op, GeTensorDesc &desc); Status UpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckAndUpdateInput(const std::vector &user_input, const std::map &graph_option); Status CheckConstOp(); diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index 21db83aa..befffa93 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -559,8 +559,8 @@ graphStatus Impl::Init(const Graph &graph, const std::map user_input = {input1}; + std::map graph_option; + auto ret = graph_prepare.UpdateInput(user_input, graph_option); + EXPECT_EQ(ret, ge::FAILED); +} + TEST_F(UtestGraphPreproces, test_check_user_input) { ge::GraphPrepare graph_prepare; graph_prepare.compute_graph_ = BuildGraph1(); From d8ba1fb2c0d85436d43d5b0fe132a8c2e2d1724d Mon Sep 17 00:00:00 2001 From: zhengyuanhua Date: Fri, 11 Jun 2021 09:42:50 +0800 Subject: [PATCH 03/51] remove graph ut form ge --- cmake/external_libs/protobuf_shared.cmake | 12 +-- cmake/external_libs/protobuf_static.cmake | 10 +- cmake/external_libs/protoc.cmake | 12 +-- metadef | 2 +- parser | 2 +- .../testcase/ge_graph/ge_graph_anchor_unittest.cc | 112 --------------------- .../ge_graph/ge_model_serialize_unittest.cc | 3 +- .../graph/testcase/ge_graph/ge_tensor_unittest.cc | 18 +--- tests/ut/ge/CMakeLists.txt | 1 + .../partition/dynamic_shape_partition_unittest.cc | 5 +- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 3 +- 11 files changed, 26 insertions(+), 154 deletions(-) diff --git a/cmake/external_libs/protobuf_shared.cmake b/cmake/external_libs/protobuf_shared.cmake index 6334c8a3..dfdb0606 100755 --- a/cmake/external_libs/protobuf_shared.cmake +++ b/cmake/external_libs/protobuf_shared.cmake @@ -11,14 +11,14 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.") endif() if (GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.13.0.tar.gz") else() if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.13.0.tar.gz") + set(MD5 "f4489cb88922ad9c58cbe3308d59cee5") else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.13.0.tar.gz") + set(MD5 "1a6274bc4a65b55a6fa70e264d796490") endif () endif() @@ -58,7 +58,7 @@ target_include_directories(ascend_protobuf INTERFACE ${PROTOBUF_SHARED_PKG_DIR}/ set(INSTALL_BASE_DIR "") set(INSTALL_LIBRARY_DIR lib) -install(FILES ${PROTOBUF_SHARED_PKG_DIR}/${CMAKE_INSTALL_LIBDIR}/ascend_protobuf.so.3.8.0.0 OPTIONAL +install(FILES ${PROTOBUF_SHARED_PKG_DIR}/${CMAKE_INSTALL_LIBDIR}/ascend_protobuf.so.3.13.0.0 OPTIONAL DESTINATION ${INSTALL_LIBRARY_DIR}) install(FILES ${PROTOBUF_SHARED_PKG_DIR}/${CMAKE_INSTALL_LIBDIR}/ascend_protobuf.so OPTIONAL DESTINATION ${INSTALL_LIBRARY_DIR}) diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index 22f537cf..b8ff90bb 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -16,11 +16,11 @@ if(GE_PB_PKG) set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") else() if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.13.0.tar.gz") + set(MD5 "f4489cb88922ad9c58cbe3308d59cee5") else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.13.0.tar.gz") + set(MD5 "1a6274bc4a65b55a6fa70e264d796490") endif () endif() @@ -29,8 +29,6 @@ set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") set(PROTOBUF_STATIC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protobuf_static) ExternalProject_Add(protobuf_static_build URL ${REQ_URL} - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz - #SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0 TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} diff --git a/cmake/external_libs/protoc.cmake b/cmake/external_libs/protoc.cmake index 421f2632..f16f5e22 100755 --- a/cmake/external_libs/protoc.cmake +++ b/cmake/external_libs/protoc.cmake @@ -13,14 +13,14 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR endif() if(GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.13.0.tar.gz") else() if (ENABLE_GITEE) - set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz") - set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236") + set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.13.0.tar.gz") + set(MD5 "f4489cb88922ad9c58cbe3308d59cee5") else() - set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz") - set(MD5 "3d9e32700639618a4d2d342c99d4507a") + set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.13.0.tar.gz") + set(MD5 "1a6274bc4a65b55a6fa70e264d796490") endif () endif() @@ -28,8 +28,6 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack") ExternalProject_Add(protoc_build URL ${REQ_URL} - #URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz - #SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 TLS_VERIFY OFF CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc /cmake BUILD_COMMAND $(MAKE) diff --git a/metadef b/metadef index b27915cd..c6030152 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit b27915cd37919430a61953f8998b7acce4a60177 +Subproject commit c6030152c6dc05515115765babb5d64fde649df4 diff --git a/parser b/parser index e75eda62..155d3262 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit e75eda62de2b51a0bded5481ca81eb8fc7bf376e +Subproject commit 155d3262ba17f800094abb58b6a809b041cf0a74 diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_graph_anchor_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_graph_anchor_unittest.cc index 5cf7569b..85328b27 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_graph_anchor_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_graph_anchor_unittest.cc @@ -272,115 +272,3 @@ TEST_F(UtestGeAnchor, graph_utils_test) { EXPECT_EQ(GraphUtils::RemoveEdge(conv_node->GetOutDataAnchor(0), bn_node->GetInControlAnchor()), GRAPH_SUCCESS); EXPECT_EQ(GraphUtils::RemoveEdge(conv_node->GetOutDataAnchor(0), bn_node->GetInControlAnchor()), GRAPH_FAILED); } - -TEST_F(UtestGeAnchor, data_anchor_replace_peer) { - ComputeGraphPtr graph_ptr = std::make_shared("graph"); - OpDescPtr in_op_ptr = std::make_shared("in_op_1", "float"); - in_op_ptr->AddInputDesc("x1", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddInputDesc("x2", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddInputDesc("x3", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y1", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y2", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y3", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - NodePtr node1 = graph_ptr->AddNode(in_op_ptr); - NodePtr node2 = graph_ptr->AddNode(in_op_ptr); - NodePtr node3 = graph_ptr->AddNode(in_op_ptr); - - OutDataAnchorPtr out_data_anchor = node1->GetOutDataAnchor(1); - InDataAnchorPtr in_data_anchor = node2->GetInDataAnchor(1); - EXPECT_EQ(out_data_anchor != nullptr, true); - EXPECT_EQ(in_data_anchor != nullptr, true); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(0)), GRAPH_SUCCESS); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(1)), GRAPH_SUCCESS); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(2)), GRAPH_SUCCESS); - - size_t out_idx = 0; - for (; out_idx < out_data_anchor->peer_anchors_.size(); out_idx++) { - if (out_data_anchor->peer_anchors_[out_idx].lock() == in_data_anchor) { - break; - } - } - EXPECT_EQ(out_idx, 1); - - size_t in_idx = 0; - for (; in_idx < in_data_anchor->peer_anchors_.size(); in_idx++) { - if (in_data_anchor->peer_anchors_[in_idx].lock() == out_data_anchor) { - break; - } - } - EXPECT_EQ(in_idx, 0); - - out_data_anchor->ReplacePeer(in_data_anchor, node3->GetInDataAnchor(1), node3->GetOutDataAnchor(1)); - - size_t out_idx1 = 0; - for (; out_idx1 < out_data_anchor->peer_anchors_.size(); out_idx1++) { - if (out_data_anchor->peer_anchors_[out_idx1].lock() == node3->GetInDataAnchor(1)) { - break; - } - } - EXPECT_EQ(out_idx1, out_idx); - - size_t in_idx1 = 0; - for (; in_idx1 < in_data_anchor->peer_anchors_.size(); in_idx1++) { - if (in_data_anchor->peer_anchors_[in_idx1].lock() == node3->GetOutDataAnchor(1)) { - break; - } - } - EXPECT_EQ(in_idx1, in_idx); -} - -TEST_F(UtestGeAnchor, graph_utils_insert_node) { - ComputeGraphPtr graph_ptr = std::make_shared("graph"); - OpDescPtr in_op_ptr = std::make_shared("in_op_1", "float"); - in_op_ptr->AddInputDesc("x1", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddInputDesc("x2", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddInputDesc("x3", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y1", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y2", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - in_op_ptr->AddOutputDesc("y3", GeTensorDesc(GeShape({1, 32, 8, 8}), FORMAT_NCHW)); - NodePtr node1 = graph_ptr->AddNode(in_op_ptr); - NodePtr node2 = graph_ptr->AddNode(in_op_ptr); - NodePtr node3 = graph_ptr->AddNode(in_op_ptr); - - OutDataAnchorPtr out_data_anchor = node1->GetOutDataAnchor(1); - InDataAnchorPtr in_data_anchor = node2->GetInDataAnchor(1); - EXPECT_EQ(out_data_anchor != nullptr, true); - EXPECT_EQ(in_data_anchor != nullptr, true); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(0)), GRAPH_SUCCESS); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(1)), GRAPH_SUCCESS); - EXPECT_EQ(node1->GetOutDataAnchor(1)->LinkTo(node2->GetInDataAnchor(2)), GRAPH_SUCCESS); - - size_t out_idx = 0; - for (; out_idx < out_data_anchor->peer_anchors_.size(); out_idx++) { - if (out_data_anchor->peer_anchors_[out_idx].lock() == in_data_anchor) { - break; - } - } - EXPECT_EQ(out_idx, 1); - - size_t in_idx = 0; - for (; in_idx < in_data_anchor->peer_anchors_.size(); in_idx++) { - if (in_data_anchor->peer_anchors_[in_idx].lock() == out_data_anchor) { - break; - } - } - EXPECT_EQ(in_idx, 0); - - GraphUtils::InsertNodeBetweenDataAnchors(out_data_anchor, in_data_anchor, node3); - - size_t out_idx1 = 0; - for (; out_idx1 < out_data_anchor->peer_anchors_.size(); out_idx1++) { - if (out_data_anchor->peer_anchors_[out_idx1].lock() == node3->GetInDataAnchor(0)) { - break; - } - } - EXPECT_EQ(out_idx1, out_idx); - - size_t in_idx1 = 0; - for (; in_idx1 < in_data_anchor->peer_anchors_.size(); in_idx1++) { - if (in_data_anchor->peer_anchors_[in_idx1].lock() == node3->GetOutDataAnchor(0)) { - break; - } - } - EXPECT_EQ(in_idx1, in_idx); -} diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_model_serialize_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_model_serialize_unittest.cc index 0366446c..c91f68df 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_model_serialize_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_model_serialize_unittest.cc @@ -30,6 +30,7 @@ #include "graph/model_serialize.h" #include "graph/detail/model_serialize_imp.h" +#include "graph/node_impl.h" #include "graph/ge_attr_value.h" #include "graph/utils/graph_utils.h" #include "graph/utils/tensor_utils.h" @@ -1062,7 +1063,7 @@ TEST(UtestGeModelSerialize, test_model_serialize_imp_invalid_param) { auto graph = std::make_shared("test_graph"); auto node = graph->AddNode(std::make_shared()); - node->op_ = nullptr; + node->impl_->op_ = nullptr; ge::proto::ModelDef model_def; Model model; model.SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph)); diff --git a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc index aa43ac99..838df735 100644 --- a/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc +++ b/tests/ut/common/graph/testcase/ge_graph/ge_tensor_unittest.cc @@ -25,6 +25,7 @@ #include "graph/ge_attr_value.h" #include "graph/tensor.h" #include "graph/utils/tensor_utils.h" +#include "graph/ge_tensor_impl.h" #undef private #undef protected @@ -196,23 +197,6 @@ TEST_F(UtestGeTensor, test_shape_copy_move) { EXPECT_EQ(shape4.GetDimNum(), 3); } -TEST_F(UtestGeTensor, test_tensor_desc_invalid_null) { - GeTensorDesc tensor_desc(nullptr, nullptr); - EXPECT_EQ(tensor_desc.GetDataType(), DT_UNDEFINED); - EXPECT_EQ(tensor_desc.GetFormat(), FORMAT_RESERVED); - EXPECT_EQ(tensor_desc.MutableShape().shape_def_.GetProtoMsg(), nullptr); - - GeTensorDesc tensor_desc2; - EXPECT_EQ(tensor_desc2.GetDataType(), DT_FLOAT); - EXPECT_EQ(tensor_desc2.GetFormat(), FORMAT_ND); - - tensor_desc2.SetDataType(DT_DUAL_SUB_INT8); - EXPECT_EQ(tensor_desc2.GetDataType(), DT_DUAL_SUB_INT8); - - TensorUtils::SetWeightSize(tensor_desc, 100); - EXPECT_EQ(TensorUtils::GetWeightSize(tensor_desc), 0); -} - TEST_F(UtestGeTensor, test_tensor_invalid_null) { ProtoMsgOwner msg_owner; GeTensor tensor(msg_owner, nullptr); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 63579109..0d1ae079 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -121,6 +121,7 @@ set(GRAPH_SRC_FILES "${GE_CODE_DIR}/metadef/register/op_tiling.cpp" "${GE_CODE_DIR}/metadef/graph/utils/tuning_utils.cc" "${GE_CODE_DIR}/metadef/register/op_tiling_registry.cpp" + "${GE_CODE_DIR}/metadef/register/op_tiling_registry_impl.cpp" ) set(PARSER_SRC_FILES diff --git a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc index c8abadb5..ec1caebd 100644 --- a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc +++ b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc @@ -20,6 +20,7 @@ #define protected public #include "graph/partition/dynamic_shape_partition.h" #include "compute_graph.h" +#include "graph/compute_graph_impl.h" #include "inc/framework/common/types.h" #include "utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" @@ -111,9 +112,9 @@ TEST_F(UtestDynamicShapePartition, merge_control_flow_group) { (void)AttrUtils::SetBool(merge->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); (void)AttrUtils::SetInt(merge->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); - EXPECT_EQ(graph->sub_graph_.size(), 0); + EXPECT_EQ(graph->impl_->sub_graph_.size(), 0); DynamicShapePartitioner partitioner(graph); EXPECT_EQ(partitioner.Partition(), SUCCESS); - EXPECT_EQ(graph->sub_graph_.size(), 1); + EXPECT_EQ(graph->impl_->sub_graph_.size(), 1); } } // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 7a2a5dfe..f6c75d50 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -40,6 +40,7 @@ #include "graph/types.h" #include "graph/utils/tensor_utils.h" #include "graph/testcase/ge_graph/graph_builder_utils.h" +#include "graph/op_desc_impl.h" #undef private #undef protected @@ -736,7 +737,7 @@ TEST_F(UtestGeHybrid, TestParseDependencies) { std::vector deps; deps.push_back("Data"); auto op_desc = netoutput->GetOpDesc(); - op_desc->input_name_idx_["Data"] = 0; + op_desc->impl_->input_name_idx_["Data"] = 0; auto data_desc = data->GetOpDesc(); auto tensor = std::make_shared(); auto tensor_desc = data_desc->MutableInputDesc(0); From 4c3c819129d6b96e0b70b27de18474b9d7844691 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Sun, 13 Jun 2021 19:44:48 +0800 Subject: [PATCH 04/51] Optimize performance of single_op executor. --- ge/hybrid/executor/hybrid_model_executor.cc | 14 ++++---- ge/hybrid/executor/hybrid_model_executor.h | 3 +- ge/single_op/single_op_model.cc | 38 +++++++++++++++++++++- .../hybrid_model_async_executor_unittest.cc | 5 ++- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 1 + 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index d8939175..b3c2c471 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -41,6 +41,8 @@ HybridModelExecutor::~HybridModelExecutor() { Status HybridModelExecutor::Init() { GELOGD("Start to init HybridGraphEngine."); GE_CHK_STATUS_RET_NOLOG(InitExecutionContext()); + root_graph_executor_.reset(new (std::nothrow) SubgraphExecutor(model_->GetRootGraphItem(), &context_)); + GE_CHECK_NOTNULL(root_graph_executor_); GELOGD("HybridGraphEngine initialized successfully."); return SUCCESS; } @@ -60,8 +62,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); } - SubgraphExecutor executor(model_->GetRootGraphItem(), &context_); - auto ret = ExecuteGraphInternal(executor, args); + auto ret = ExecuteGraphInternal(args); Cleanup(); RECORD_MODEL_EXECUTION_EVENT(&context_, "[Cleanup] End"); GELOGD("Model executed successfully."); @@ -79,8 +80,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { return SUCCESS; } -Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, - HybridModelExecutor::ExecuteArgs &args) { +Status HybridModelExecutor::ExecuteGraphInternal(HybridModelExecutor::ExecuteArgs &args) { RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] Start"); GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); @@ -94,7 +94,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 0, stream_, device_id)); } - HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), + HYBRID_CHK_STATUS_RET(root_graph_executor_->ExecuteAsync(args.inputs, args.input_desc, args.outputs), "Failed to execute partitioned call."); RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); @@ -103,7 +103,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, } if (!model_->IsSingleOp()) { - Status ret = executor.Synchronize(); + Status ret = root_graph_executor_->Synchronize(); if (ret != ge::SUCCESS) { auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); @@ -123,7 +123,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, } args.outputs.clear(); - HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); + HYBRID_CHK_STATUS_RET(root_graph_executor_->GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); return SUCCESS; } diff --git a/ge/hybrid/executor/hybrid_model_executor.h b/ge/hybrid/executor/hybrid_model_executor.h index 566043d9..102e4f8b 100644 --- a/ge/hybrid/executor/hybrid_model_executor.h +++ b/ge/hybrid/executor/hybrid_model_executor.h @@ -48,7 +48,7 @@ class HybridModelExecutor { Status Execute(ExecuteArgs &args); private: - Status ExecuteGraphInternal(SubgraphExecutor &executor, ExecuteArgs &args); + Status ExecuteGraphInternal(ExecuteArgs &args); Status Cleanup(); Status InitExecutionContext(); static Status ResetExecutionContext(GraphExecutionContext &context); @@ -58,6 +58,7 @@ class HybridModelExecutor { uint32_t device_id_; rtStream_t stream_; GraphExecutionContext context_; + std::unique_ptr root_graph_executor_; }; } // namespace hybrid } // namespace ge diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 67642f2e..3c0f7972 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -44,20 +44,56 @@ using std::vector; namespace ge { namespace { const size_t kDataOutputNum = 1; +const uint32_t kInputIndexOfData = 0; const uint32_t kOutputIndexOfData = 0; constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; +Status CheckHostMem(const std::vector &dependencies, const NodePtr &node, bool &flag) { + for (const auto &input_name : dependencies) { + auto op_desc = node->GetOpDesc(); + int input_index = op_desc->GetInputIndexByName(input_name); + if (input_index < 0) { + GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.", + node->GetName().c_str(), input_name.c_str()); + REPORT_CALL_ERROR("E19999", "GetInputIndexByName failed, node:[%s] inputname: %s.", + node->GetName().c_str(), input_name.c_str()); + return INTERNAL_ERROR; + } + + const auto &in_anchor = node->GetInDataAnchor(input_index); + GE_CHECK_NOTNULL(in_anchor); + const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + const auto &src_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(src_node); + auto src_op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(src_op_desc); + if (src_op_desc->GetType() == DATA) { + auto tensor = src_op_desc->MutableInputDesc(kInputIndexOfData); + if (AttrUtils::HasAttr(tensor, ATTR_NAME_VALUE)) { + GELOGD("Get hostmem from node %s, inputname: %s.", src_node->GetName().c_str(), input_name.c_str()); + continue; + } + } + flag = false; + return SUCCESS; + } + flag = true; + return SUCCESS; +} + Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); GE_CHECK_NOTNULL(comp_graph); for (const auto &node : comp_graph->GetAllNodes()) { + GE_CHECK_NOTNULL(node); auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); const auto &depends = op_desc->GetOpInferDepends(); bool support_dynamic_shape = false; (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); if (!depends.empty() && support_dynamic_shape) { - flag = true; + CheckHostMem(depends, node, flag); return SUCCESS; } } diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc index d2679439..52537ee2 100644 --- a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -92,16 +92,15 @@ TEST_F(UtestHybridModelAsyncExecutor, Test_execute_internal) { GeRootModelPtr ge_root_model = make_shared(graph); ge_root_model->SetModelName("test_name"); HybridModel hybrid_model(ge_root_model); + hybrid_model.root_graph_item_.reset(new GraphItem); HybridModelExecutor executor(&hybrid_model, 0, nullptr); ASSERT_EQ(executor.Init(), SUCCESS); auto &context = executor.context_; - GraphItem graph_item; - SubgraphExecutor subgraph_executor(&graph_item, &context); HybridModelExecutor::ExecuteArgs args; std::pair> eof_entry; eof_entry.first = nullptr; context.callback_manager->callback_queue_.Push(eof_entry); - ASSERT_EQ(executor.ExecuteGraphInternal(subgraph_executor, args), SUCCESS); + ASSERT_EQ(executor.ExecuteGraphInternal(args), SUCCESS); } } // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 7a2a5dfe..088aec50 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -330,6 +330,7 @@ TEST_F(UtestGeHybrid, hybrid_model_executor) { ComputeGraphPtr compute_graph = MakeShared("abc"); GeRootModelPtr root_model = MakeShared(compute_graph); HybridModel model(root_model); + model.root_graph_item_.reset(new GraphItem); HybridModel *model_ptr = &model; uint32_t device_id = 0; From 13c98395e2c7c578375780afa4884887018d49a0 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Mon, 14 Jun 2021 20:09:00 +0800 Subject: [PATCH 05/51] Add ut. --- tests/ut/ge/single_op/single_op_model_unittest.cc | 33 ++++++++++++++++++----- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index a2c1cb02..1cb2b22c 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -17,12 +17,11 @@ #include #include +#define protected public +#define private public #include "graph/load/model_manager/model_utils.h" #include "graph/utils/graph_utils.h" #include "runtime/rt.h" - -#define protected public -#define private public #include "single_op/single_op_model.h" #include "single_op/task/tbe_task_builder.h" #include "single_op/task/rts_kernel_task_builder.h" @@ -30,14 +29,18 @@ #include "framework/common/helper/model_helper.h" #include "single_op/single_op.h" #include "single_op/stream_resource.h" +#include "graph/passes/graph_builder_utils.h" #undef private #undef protected -#include "graph/passes/graph_builder_utils.h" using namespace std; using namespace testing; using namespace ge; +namespace { +constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; +} // namespace + class UtestSingleOpModel : public testing::Test { protected: void SetUp() {} @@ -208,12 +211,28 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { model.model_helper_.model_ = ge::MakeShared(); // make graph - auto compute_graph = make_shared("graph"); - auto data_op = make_shared("Data", DATA); - auto data_node = compute_graph->AddNode(data_op); + ut::GraphBuilder builder = ut::GraphBuilder("graph"); + auto data = builder.AddNode("Data", "Data", 0, 1); + auto transdata = builder.AddNode("Transdata", "Transdata", 1, 1); + auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0); + builder.AddDataEdge(data, 0, transdata, 0); + builder.AddDataEdge(transdata, 0, netoutput, 0); + auto compute_graph = builder.GetGraph(); + auto graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph); model.model_helper_.model_->SetGraph(graph); + auto op_desc = transdata->GetOpDesc(); + op_desc->input_name_idx_["Data"] = 0; + const vector depend_names = { "Data" }; + op_desc->SetOpInferDepends(depend_names); + (void)AttrUtils::SetBool(op_desc, kAttrSupportDynamicShape, true); + + auto tensor = std::make_shared(); + auto data_desc = data->GetOpDesc(); + auto tensor_desc = data_desc->MutableInputDesc(0); + AttrUtils::SetTensor(tensor_desc, "_value", tensor); + // set task_def auto model_task_def = make_shared(); domi::TaskDef *task_def = model_task_def->add_task(); From 1ab9ae32dc4520be393242297ce900beeb9d2564 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 10:00:19 +0800 Subject: [PATCH 06/51] Add ut. --- ge/single_op/single_op_model.cc | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 3c0f7972..4a7638b1 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -48,7 +48,7 @@ const uint32_t kInputIndexOfData = 0; const uint32_t kOutputIndexOfData = 0; constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; -Status CheckHostMem(const std::vector &dependencies, const NodePtr &node, bool &flag) { +Status CheckHostMem(const std::vector &dependencies, const NodePtr &node, bool &is_host_mem) { for (const auto &input_name : dependencies) { auto op_desc = node->GetOpDesc(); int input_index = op_desc->GetInputIndexByName(input_name); @@ -75,14 +75,14 @@ Status CheckHostMem(const std::vector &dependencies, const NodePtr &node continue; } } - flag = false; + is_host_mem = false; return SUCCESS; } - flag = true; + is_host_mem = true; return SUCCESS; } -Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { +Status CheckInferDepend(GeModelPtr &ge_model, bool &is_infer_depend, bool &is_host_mem) { auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); GE_CHECK_NOTNULL(comp_graph); for (const auto &node : comp_graph->GetAllNodes()) { @@ -93,16 +93,18 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { bool support_dynamic_shape = false; (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); if (!depends.empty() && support_dynamic_shape) { - CheckHostMem(depends, node, flag); - return SUCCESS; + is_infer_depend = true; + return CheckHostMem(depends, node, is_host_mem); } } return SUCCESS; } Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) { - bool infer_depend_flag = false; - GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); + bool is_infer_depend = false; + bool is_host_mem = false; + GE_CHK_STATUS_RET(CheckInferDepend(ge_model, is_infer_depend, is_host_mem), "[Check][InferDepend] failed."); + bool need_d2h_cpy = is_infer_depend && !is_host_mem; auto tasks = ge_model->GetModelTaskDefPtr()->task(); int32_t kernel_task_num = 0; for (int i = 0; i < tasks.size(); ++i) { @@ -112,7 +114,7 @@ Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) { tasks[i].kernel_with_handle().context(); auto kernel_type = static_cast(context.kernel_type()); if (kernel_type == ccKernelType::TE) { - if (infer_depend_flag) { + if (need_d2h_cpy) { flag = true; return SUCCESS; } @@ -553,7 +555,8 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); bool infer_depend_flag = false; - GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); + bool is_host_mem = false; + GE_CHK_STATUS_RET(CheckInferDepend(ge_model, infer_depend_flag, is_host_mem)), "[Check][InferDepend] failed."); if (infer_depend_flag) { // construct single_op, do single op with HybridModelExecutor GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); From b35412f5eaa40705adc2bdd014d62ebc32a0f898 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 10:07:43 +0800 Subject: [PATCH 07/51] Add ut. --- ge/single_op/single_op_model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 4a7638b1..182d1466 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -556,7 +556,7 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { GE_CHECK_NOTNULL(ge_model); bool infer_depend_flag = false; bool is_host_mem = false; - GE_CHK_STATUS_RET(CheckInferDepend(ge_model, infer_depend_flag, is_host_mem)), "[Check][InferDepend] failed."); + GE_CHK_STATUS_RET(CheckInferDepend(ge_model, infer_depend_flag, is_host_mem), "[Check][InferDepend] failed."); if (infer_depend_flag) { // construct single_op, do single op with HybridModelExecutor GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); From e85bbe218143a8e02ab17884da223447a11a440e Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 12 Jun 2021 12:00:01 +0800 Subject: [PATCH 08/51] Fix dynamic shape partition --- ge/graph/common/omg_util.cc | 15 -- ge/graph/common/omg_util.h | 9 - ge/graph/partition/dynamic_shape_partition.cc | 23 ++- ge/graph/partition/dynamic_shape_partition.h | 2 +- .../passes/mark_force_unknown_for_cond_pass.cc | 38 +--- ge/graph/passes/mark_graph_unknown_status_pass.cc | 6 + ge/graph/passes/merge_to_stream_merge_pass.cc | 5 +- ge/graph/passes/next_iteration_pass.cc | 10 +- ge/graph/passes/switch_to_stream_switch_pass.cc | 16 +- ge/hybrid/executor/node_state.cc | 57 +++++- ge/hybrid/executor/node_state.h | 4 + ge/hybrid/executor/subgraph_context.cc | 2 +- ge/hybrid/executor/subgraph_context.h | 4 +- ge/hybrid/executor/subgraph_executor.cc | 18 +- ge/hybrid/executor/subgraph_executor.h | 1 - ge/hybrid/model/node_item.cc | 5 +- ge/hybrid/model/node_item.h | 5 +- ge/hybrid/node_executor/task_context.cc | 17 +- ge/hybrid/node_executor/task_context.h | 4 +- .../partition/dynamic_shape_partition_unittest.cc | 194 ++++++++++++++++----- .../executor/worker/execution_engine_unittest.cc | 8 +- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 8 +- .../ge_local/ge_local_node_executor_unittest.cc | 5 - .../node_executor/rts/rts_node_task_unittest.cc | 40 ----- 24 files changed, 283 insertions(+), 213 deletions(-) diff --git a/ge/graph/common/omg_util.cc b/ge/graph/common/omg_util.cc index 52e6cb9c..b2017e4d 100644 --- a/ge/graph/common/omg_util.cc +++ b/ge/graph/common/omg_util.cc @@ -275,21 +275,6 @@ bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc) { } /// -/// @brief Set Op _force_unknown_shape flag -/// @param [in] node -/// @param [in] force_unknown, set attribute if true -/// @param [in] group_index, condition group index of node. -/// @return -/// -void MarkForceUnknownShape(const NodePtr &node, bool force_unknown, int64_t group_index) { - if (!force_unknown) { - return; - } - - SetControlFlowGroup(node, group_index); -} - -/// /// @brief Set Op _control_flow_group flag /// @param [in] node /// @param [in] group, condition group index of node. diff --git a/ge/graph/common/omg_util.h b/ge/graph/common/omg_util.h index 148e4102..edaafa45 100644 --- a/ge/graph/common/omg_util.h +++ b/ge/graph/common/omg_util.h @@ -126,15 +126,6 @@ Status GetMemorySize(const NodePtr &node, int64_t &output_size); bool IsUnknownShapeTensor(const GeTensorDesc &tensor_desc); /// -/// @brief Set Op _force_unknown_shape flag -/// @param [in] node -/// @param [in] force_unknown, set attribute if true -/// @param [in] group_index, condition group index of node. -/// @return -/// -void MarkForceUnknownShape(const NodePtr &node, bool force_unknown, int64_t group_index); - -/// /// @brief Set Op _control_flow_group flag /// @param [in] node /// @param [in] group, condition group index of node. diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc index 055b2aa4..1db47498 100755 --- a/ge/graph/partition/dynamic_shape_partition.cc +++ b/ge/graph/partition/dynamic_shape_partition.cc @@ -364,6 +364,7 @@ static std::string ToString(const std::vector &clusters) { } void DynamicShapePartitioner::MergeClustersControlFlow() { + std::unordered_set all_merged_clusters; for (const auto &item : control_clusters_) { const auto &control_cluster = item.second; auto rit = control_cluster.rbegin(); @@ -373,17 +374,32 @@ void DynamicShapePartitioner::MergeClustersControlFlow() { } const auto &cluster = *rit; + if (all_merged_clusters.count(cluster) > 0) { + continue; + } + + bool is_unknown_cluster = cluster->IsUnknownShape(); for (++rit; rit != control_cluster.rend(); ++rit) { const auto &cluster_from = *rit; + if (all_merged_clusters.count(cluster_from) > 0) { + continue; + } + auto merged_clusters = cluster->MergeAllPathFrom(cluster_from); GELOGD("Merge all path cluster from %lu to %lu %s.", cluster_from->Id(), cluster->Id(), ToString(merged_clusters).c_str()); for (const auto &merged_cluster : merged_clusters) { + all_merged_clusters.emplace(merged_cluster); for (const auto &node : merged_cluster->Nodes()) { node_2_cluster_[node] = cluster; } } } + + if (!is_unknown_cluster && cluster->IsUnknownShape()) { + GELOGD("Add to ordered cluster: %s", cluster->DebugString().c_str()); + ordered_cluster_.push_back(cluster); + } } } @@ -703,7 +719,12 @@ void Cluster::Merge(ClusterPtr other) { if (other->min_ < min_) { min_ = other->min_; } -}; + + if (!IsUnknownShape() && other->IsUnknownShape()) { + type_ = UNKNOWN_SHAPE; + } +} + bool Cluster::TryMerge(ClusterPtr other) { std::queue forward_reached; forward_reached.push(other); diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h index a17c4e4b..bd3b128f 100644 --- a/ge/graph/partition/dynamic_shape_partition.h +++ b/ge/graph/partition/dynamic_shape_partition.h @@ -161,7 +161,7 @@ class DynamicShapePartitioner { ge::ComputeGraphPtr root_graph_; // The original graph to partition std::unordered_map> node_2_cluster_; // Record nodes and the cluster it belongs to // V1 control flow cluster, need merge to one Graph. - std::unordered_map>> control_clusters_; + std::map>> control_clusters_; // topological sorted clusters, this field will change with the splitting. // When partitioning UNKNOWN_SHAPE cluster, it is a collection of all topological sorted UNKNOWN_SHAPE clusters // When partitioning KNOWN_SHAPE cluster, it is a collection of all topological sorted KNOWN_SHAPE clusters diff --git a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc index 08b358ee..74babadc 100644 --- a/ge/graph/passes/mark_force_unknown_for_cond_pass.cc +++ b/ge/graph/passes/mark_force_unknown_for_cond_pass.cc @@ -132,39 +132,17 @@ void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const NodePtr &node, std: /// @return /// void MarkForceUnknownForCondPass::MarkUnknownForSwitch(const std::map> &switch_groups) { - std::function callback = [](const NodePtr &n) { - return n->GetOpDesc()->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP); - }; - - for (auto it1 = switch_groups.begin(); it1 != switch_groups.end(); ++it1) { - const auto &op_node1 = it1->first; - const auto &op_desc1 = op_node1->GetOpDesc(); - if (op_desc1->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { + for (auto it = switch_groups.begin(); it != switch_groups.end(); ++it) { + const auto &op_node = it->first; + const auto &op_desc = op_node->GetOpDesc(); + if (op_desc->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { continue; } - if (IsUnknownShapeTensor(op_desc1->GetOutputDesc(0))) { - int64_t group_index = op_desc1->GetId(); - GELOGI("Mark %s as unknown shape control flow, group index: %ld", op_desc1->GetName().c_str(), group_index); - MarkForceUnknownShape(op_node1, true, group_index); - for (const auto &n : it1->second) { - MarkForceUnknownShape(n, true, group_index); - } - - for (auto it2 = switch_groups.begin(); it2 != switch_groups.end(); ++it2) { - const auto &op_node2 = it2->first; - const auto &op_desc2 = op_node2->GetOpDesc(); - if (op_desc2->HasAttr(ATTR_NAME_CONTROL_FLOW_GROUP)) { - continue; - } - - if (std::any_of(it2->second.begin(), it2->second.end(), callback)) { - MarkForceUnknownShape(op_node2, true, group_index); - for (const auto &n : it2->second) { - MarkForceUnknownShape(n, true, group_index); - } - } - } + int64_t group_index = op_desc->GetId(); + SetControlFlowGroup(op_node, group_index); + for (const auto &n : it->second) { + SetControlFlowGroup(n, group_index); } } } diff --git a/ge/graph/passes/mark_graph_unknown_status_pass.cc b/ge/graph/passes/mark_graph_unknown_status_pass.cc index 2d7b179b..9e460fc7 100644 --- a/ge/graph/passes/mark_graph_unknown_status_pass.cc +++ b/ge/graph/passes/mark_graph_unknown_status_pass.cc @@ -40,6 +40,12 @@ Status MarkGraphUnknownStatusPass::Run(ComputeGraphPtr graph) { } } + const auto &node = graph->GetParentNode(); + if (!is_unknown_shape && node != nullptr && node->GetType() == PARTITIONEDCALL) { + GE_CHK_GRAPH_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_unknown_shape), + "[Get][ShapeStatus] of node[%s] failed!", node->GetName().c_str()); + } + for (const auto &node : graph->GetDirectNode()) { GELOGD("Set OwnerGraphIsUnknown attr to node[%s]", node->GetName().c_str()); (void)AttrUtils::SetBool(node->GetOpDesc(), kOwnerGraphIsUnknown, is_unknown_shape); diff --git a/ge/graph/passes/merge_to_stream_merge_pass.cc b/ge/graph/passes/merge_to_stream_merge_pass.cc index 0b383911..dbcff620 100644 --- a/ge/graph/passes/merge_to_stream_merge_pass.cc +++ b/ge/graph/passes/merge_to_stream_merge_pass.cc @@ -89,8 +89,7 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons REPORT_INNER_ERROR("E19999", "Param node is nullptr, check invalid"); return FAILED, "[Check][Param] Param of pre node is nullptr."); int64_t group_index = -1; - bool force_unknown = AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); - MarkForceUnknownShape(node, force_unknown, group_index); + (void)AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); for (const InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) { OutDataAnchorPtr peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); @@ -109,7 +108,7 @@ Status MergeToStreamMergePass::AddActiveNodes(const ComputeGraphPtr &graph, cons GELOGE(FAILED, "[Set][ActiveLabelList] for node %s failed.", active_node->GetName().c_str()); return FAILED; } - MarkForceUnknownShape(active_node, force_unknown, group_index); + SetControlFlowGroup(active_node, group_index); } return SUCCESS; diff --git a/ge/graph/passes/next_iteration_pass.cc b/ge/graph/passes/next_iteration_pass.cc index 67735b8b..fb8f8627 100644 --- a/ge/graph/passes/next_iteration_pass.cc +++ b/ge/graph/passes/next_iteration_pass.cc @@ -284,13 +284,21 @@ Status NextIterationPass::HandleWhileGroup(ComputeGraphPtr &graph) { /// @return void /// void NextIterationPass::HandleSwitchExitNodes(const LoopCondGroup &loop_group, int64_t group_index) { + std::string node_type; for (const auto &switch_node : loop_group.switch_nodes) { SetControlFlowGroup(switch_node, group_index); for (const auto &node : switch_node->GetOutDataNodes()) { - std::string node_type; (void)GetOriginalType(node, node_type); if (kExitOpTypes.count(node_type) > 0) { SetControlFlowGroup(node, group_index); + } else { + // For: Switch -> Cast -> Exit + for (const auto &n : node->GetOutDataNodes()) { + (void)GetOriginalType(n, node_type); + if (kExitOpTypes.count(node_type) > 0) { + SetControlFlowGroup(n, group_index); + } + } } } } diff --git a/ge/graph/passes/switch_to_stream_switch_pass.cc b/ge/graph/passes/switch_to_stream_switch_pass.cc index e7743130..e4ab0111 100644 --- a/ge/graph/passes/switch_to_stream_switch_pass.cc +++ b/ge/graph/passes/switch_to_stream_switch_pass.cc @@ -395,8 +395,8 @@ NodePtr SwitchToStreamSwitchPass::CreateStreamSwitchNode(const ComputeGraphPtr & peer_cond_anchor->GetOwnerNode()->GetName().c_str(), stream_switch->GetName().c_str()); int64_t group_index = -1; - bool force_unknown = AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); - MarkForceUnknownShape(stream_switch, force_unknown, group_index); + (void)AttrUtils::GetInt(switch_node->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); + SetControlFlowGroup(stream_switch, group_index); return stream_switch; } @@ -491,8 +491,8 @@ int64_t SwitchToStreamSwitchPass::GetGroupId(const NodePtr &node) { Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) { for (auto iter = cond_node_map_.begin(); iter != cond_node_map_.end(); ++iter) { for (auto group_iter = iter->second.begin(); group_iter != iter->second.end(); ++group_iter) { - std::list false_switch_list = group_iter->second[SWITCH_FALSE_OUTPUT]; - std::list true_switch_list = group_iter->second[SWITCH_TRUE_OUTPUT]; + const std::list &false_switch_list = group_iter->second[SWITCH_FALSE_OUTPUT]; + const std::list &true_switch_list = group_iter->second[SWITCH_TRUE_OUTPUT]; std::set same_cond_switch; same_cond_switch.insert(false_switch_list.begin(), false_switch_list.end()); same_cond_switch.insert(true_switch_list.begin(), true_switch_list.end()); @@ -524,13 +524,13 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) std::function callback = [&group_index](const NodePtr &n) { return AttrUtils::GetInt(n->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, group_index); }; - bool is_unknown_shape = std::any_of(same_cond_switch.begin(), same_cond_switch.end(), callback); - MarkForceUnknownShape(active_node, is_unknown_shape, group_index); + (void)std::any_of(same_cond_switch.begin(), same_cond_switch.end(), callback); + SetControlFlowGroup(active_node, group_index); const std::string &cond_group = cond_node->GetName(); for (uint32_t i = 0; i < SWITCH_OUTPUT_NUM; ++i) { bool true_branch_flag = (i == SWITCH_TRUE_OUTPUT); - std::list &switch_list = (true_branch_flag ? true_switch_list : false_switch_list); + const std::list &switch_list = (true_branch_flag ? true_switch_list : false_switch_list); GE_IF_BOOL_EXEC(switch_list.empty(), continue); // select first stream_switch @@ -559,7 +559,7 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph) "[Add][Edge] between %s and %s failed.", cast_node->GetName().c_str(), stream_switch->GetName().c_str()); - MarkForceUnknownShape(stream_switch, is_unknown_shape, group_index); + SetControlFlowGroup(stream_switch, group_index); for (const NodePtr &node : switch_list) { GE_IF_BOOL_EXEC(node != stream_switch, { GE_CHK_STATUS(GraphUtils::RemoveEdge(peer_cond_anchor, node->GetInDataAnchor(0)), diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 313a2934..42e08811 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -19,8 +19,9 @@ #include "framework/common/debug/log.h" #include "graph/compute_graph.h" #include "graph/utils/tensor_utils.h" -#include "hybrid_execution_context.h" -#include "subgraph_context.h" +#include "hybrid/executor/hybrid_execution_context.h" +#include "hybrid/executor/subgraph_context.h" +#include "hybrid/node_executor/task_context.h" #define INC_ITERATION_COUNT(iteration) \ do { \ @@ -258,6 +259,8 @@ ShapeFuture::ShapeFuture(NodeState *src_node, NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context) : node_item_(&node_item), shape_inference_state_(node_item), subgraph_context_(subgraph_context) { this->op_desc_ = node_item.node->GetOpDesc(); + auto unique_task_context = TaskContext::Create(this, subgraph_context_); + task_context_ = std::shared_ptr(unique_task_context.release()); } Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { @@ -314,15 +317,53 @@ std::shared_ptr NodeState::GetTaskContext() { return task_context_; } +void NodeState::SavePersistTensor(int input_idx, const TensorValue &tensor) { + if (node_item_->root_data_.count(input_idx) > 0) { + GELOGD("[%s] Save Root input tensor: %d", GetName().c_str(), input_idx); + root_tensor_values_[input_idx] = tensor; + } + + if (node_item_->enter_data_.count(input_idx) > 0) { + GELOGD("[%s] Save Enter input tensor: %d", GetName().c_str(), input_idx); + root_tensor_values_[input_idx] = tensor; + } +} + +void NodeState::UpdatePersistTensor(int input_idx) { + const auto it = root_tensor_values_.find(input_idx); + if (it == root_tensor_values_.end()) { + GELOGW("[%s] Not found saved tensor: %d", GetName().c_str(), input_idx); + return; + } + + auto tensor = task_context_->MutableInput(input_idx); + if (tensor == nullptr) { + GELOGW("[%s] Not found input tensor: %d", GetName().c_str(), input_idx); + return; + } + + *tensor = it->second; + GELOGD("[%s] Update input tensor: %d", GetName().c_str(), input_idx); +} + void NodeState::ResetContext(uint64_t iteration) { switch_index_ = -1; subgraph_context_->ResetContext(node_item_->node); - if (iteration == 0) { - data_scheduled_ = static_cast(node_item_->root_data_.size()); - ctrl_scheduled_ = static_cast(node_item_->root_ctrl_.size()); - } else { - data_scheduled_ = static_cast(node_item_->root_data_.size() + node_item_->enter_data_.size()); - ctrl_scheduled_ = static_cast(node_item_->root_ctrl_.size() + node_item_->enter_ctrl_.size()); + auto unique_task_context = TaskContext::Create(this, subgraph_context_); + task_context_ = std::shared_ptr(unique_task_context.release()); + + data_scheduled_ = static_cast(node_item_->root_data_.size()); + ctrl_scheduled_ = static_cast(node_item_->root_ctrl_.size()); + for (auto item : node_item_->root_data_) { + UpdatePersistTensor(item.first); + } + + if (iteration > 0) { + data_scheduled_ += static_cast(node_item_->enter_data_.size()); + ctrl_scheduled_ += static_cast(node_item_->enter_ctrl_.size()); + for (auto item : node_item_->enter_data_) { + UpdatePersistTensor(item.first); + } } iteration_count_ = iteration; diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 9dd29846..72e2b90e 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -129,6 +129,8 @@ struct NodeState { void RunStreamActive(); void RunNextIteration(); + void SavePersistTensor(int input_idx, const TensorValue &tensor); + Status NodeScheduled(const std::function &ready) const; void SetScheduleFuture(std::future &&future); @@ -187,6 +189,7 @@ struct NodeState { void SetCtrlSchedule(const NodeState &node_state, const std::function &ready); void ResetContext(uint64_t iteration); void ScheduleContext(const NodeState &node_state); + void UpdatePersistTensor(int input_idx); const NodeItem *node_item_ = nullptr; std::shared_ptr kernel_task_ = nullptr; @@ -199,6 +202,7 @@ struct NodeState { std::future schedule_future_; std::shared_ptr frame_state_; + std::map root_tensor_values_; uint64_t active_count_ = 0; uint64_t iteration_count_ = 0; uint32_t ctrl_scheduled_ = 0; diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index b6763ffd..41ada9af 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -19,7 +19,7 @@ namespace ge { namespace hybrid { -SubgraphContext::SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context) +SubgraphContext::SubgraphContext(const GraphItem *graph_item, GraphExecutionContext *execution_context) : graph_item_(graph_item), execution_context_(execution_context) { } diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index a43cd210..d11d00d7 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -30,7 +30,7 @@ namespace ge { namespace hybrid { class SubgraphContext { public: - explicit SubgraphContext(const GraphItem *graph_item, const GraphExecutionContext *execution_context); + explicit SubgraphContext(const GraphItem *graph_item, GraphExecutionContext *execution_context); ~SubgraphContext(); Status Init(); @@ -54,7 +54,7 @@ class SubgraphContext { FrameStatePtr GetOrCreateFrameState(const NodeItem &node_item); // no lock friend class TaskContext; const GraphItem *graph_item_; - const GraphExecutionContext *execution_context_; + GraphExecutionContext *execution_context_; mmRWLock_t rw_lock_; std::vector all_inputs_; std::vector all_outputs_; diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 612e7565..7429acc5 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -175,16 +175,12 @@ Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vectorSetKernelTask(node_item->kernel_task); - known_shape_task_context_ = TaskContext::Create(node_state.get(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(known_shape_task_context_); - node_state->SetTaskContext(known_shape_task_context_); - std::function callback; GE_CHK_STATUS_RET_NOLOG(InitCallback(node_state.get(), callback)); - HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, known_shape_task_context_, *context_, callback), + HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, node_state->GetTaskContext(), *context_, callback), "[%s] Failed to execute node [%s] for known subgraph.", graph_item_->GetName().c_str(), - known_shape_task_context_->GetNodeName()); + node_state->GetName().c_str()); GELOGD("[%s] Done execute non-dynamic subgraph successfully.", graph_item_->GetName().c_str()); return SUCCESS; @@ -271,16 +267,12 @@ Status SubgraphExecutor::PrepareNode(const NodeItem &node_item, int group) { } else { node_state->SetKernelTask(node_item.kernel_task); } - auto unique_task_context = TaskContext::Create(node_state.get(), context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(unique_task_context); const auto &task = node_state->GetKernelTask(); if (task == nullptr) { GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str()); REPORT_CALL_ERROR("E19999", "GetKernelTask failed for %s, nodetask is null.", node_state->GetName().c_str()); return INTERNAL_ERROR; } - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); GE_CHK_STATUS_RET_NOLOG(NodeEnqueue(p_node_state)); return AfterPrepared(p_node_state); } @@ -480,19 +472,15 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta } else { node_state.SetKernelTask(node_item.kernel_task); } - auto unique_task_context = TaskContext::Create(&node_state, context_, subgraph_context_.get()); - GE_CHECK_NOTNULL(unique_task_context); const auto &task = node_state.GetKernelTask(); if (task == nullptr) { GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str()); REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null.", node_state.GetName().c_str()); return INTERNAL_ERROR; } - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state.SetTaskContext(shared_task_context); GE_CHK_RT_RET(rtCtxSetCurrent(ctx->rt_context)); RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] start"); - GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*shared_task_context)); // update op_desc before alloc ws + GE_CHK_STATUS_RET_NOLOG(task->UpdateTilingData(*node_state.GetTaskContext())); // update op_desc before alloc ws RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[UpdateTilingData] end"); return SUCCESS; } diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 758bf426..e4c0debe 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -125,7 +125,6 @@ class SubgraphExecutor { ThreadPool pre_run_pool_; BlockingQueue ready_queue_; std::unique_ptr shape_inference_engine_; - std::shared_ptr known_shape_task_context_; std::mutex mu_; // Guard for prepare_queues_. std::map> prepare_queues_; diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index b339e630..cef06fc6 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -398,12 +398,11 @@ void NodeItem::SetDataSend(NodeItem *node_item, int anchor_index) { data_send_.emplace(node_item); node_item->data_recv_[this] = anchor_index; if (is_root_node_) { - node_item->root_data_.emplace(this); + node_item->root_data_[anchor_index] = this; } // If Enter feed Not Merge, take as root Node. if (IsEnterOp() && (node_item->node_type != STREAMMERGE)) { - node_item->enter_data_.emplace(this); - node_item->enter_inside_.emplace(anchor_index); + node_item->enter_data_[anchor_index] = this; } GELOGI("Node[%s] will control node[%s]", NodeName().c_str(), node_item->NodeName().c_str()); } diff --git a/ge/hybrid/model/node_item.h b/ge/hybrid/model/node_item.h index 8de15952..ec66f094 100644 --- a/ge/hybrid/model/node_item.h +++ b/ge/hybrid/model/node_item.h @@ -148,15 +148,14 @@ struct NodeItem { int64_t frame_index_ = -1; int64_t parent_frame_ = -1; std::set root_ctrl_; // Recv ctrl from root node - std::set root_data_; // Recv data from root node + std::map root_data_; // Recv data from root node std::set enter_ctrl_; // Recv ctrl from Enter node - std::set enter_data_; // Recv data from Enter node + std::map enter_data_; // Recv data from Enter node std::set data_send_; // Send data notify to std::map data_recv_; // Recv data notify from std::set ctrl_send_; // Send ctrl notify to std::set ctrl_recv_; // Recv ctrl notify from std::vector> switch_groups_; // Send ctrl notify to - std::set enter_inside_; // Enter feed loop inside Node, Not cross Merge. std::shared_ptr kernel_task; std::unique_ptr fused_subgraph; diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index 14eb1222..fe580c1e 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -52,9 +52,7 @@ void TaskContext::ReleaseWorkspace() { } } -std::unique_ptr TaskContext::Create(NodeState *node_state, - GraphExecutionContext *execution_context, - SubgraphContext *subgraph_context) { +std::unique_ptr TaskContext::Create(NodeState *node_state, SubgraphContext *subgraph_context) { const NodeItem &node_item = *node_state->GetNodeItem(); GELOGI("[%s] To create task context, input start = %d, num_inputs = %d, output start = %d, num_outputs = %d.", node_item.NodeName().c_str(), @@ -75,7 +73,7 @@ std::unique_ptr TaskContext::Create(NodeState *node_state, } auto task_context = std::unique_ptr( - new(std::nothrow)TaskContext(execution_context, node_state, subgraph_context)); + new(std::nothrow)TaskContext(subgraph_context->execution_context_, node_state, subgraph_context)); if (task_context == nullptr) { REPORT_CALL_ERROR("E19999", "Create TaskContext failed for [%s].", node_item.NodeName().c_str()); GELOGE(MEMALLOC_FAILED, "[Create][TaskContext] failed for [%s].", node_item.NodeName().c_str()); @@ -85,7 +83,7 @@ std::unique_ptr TaskContext::Create(NodeState *node_state, task_context->node_item_ = &node_item; task_context->inputs_start_ = subgraph_context->all_inputs_.data() + node_item.input_start; task_context->outputs_start_ = subgraph_context->all_outputs_.data() + node_item.output_start; - task_context->iteration_ = execution_context->iteration; + task_context->iteration_ = subgraph_context->execution_context_->iteration; return task_context; } @@ -460,6 +458,10 @@ Status TaskContext::PropagateOutputs() { subgraph_context_->all_inputs_[input_offset].SetName( node_item_->NodeName() + "_in_" + std::to_string(dst_input_idx)); } + + auto dst_node_state = subgraph_context_->GetOrCreateNodeState(dst_node_item); + GE_CHECK_NOTNULL(dst_node_state); + dst_node_state->SavePersistTensor(dst_input_idx, *tensor); } } (void)guard; @@ -489,11 +491,6 @@ void TaskContext::ReleaseInputsAndOutputs() { } void TaskContext::ReleaseInput(int index) { - if (node_item_->enter_inside_.count(index) > 0) { - GELOGD("[%s] Tensor of input[%d] is enter, keep it", GetNodeName(), index); - return; - } - auto input_tensor = MutableInput(index); if (input_tensor != nullptr) { input_tensor->Destroy(); diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index ba4c62e6..c96e194e 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -36,9 +36,7 @@ class SubgraphContext; class TaskContext { public: - static std::unique_ptr Create(NodeState *node_state, - GraphExecutionContext *execution_context, - SubgraphContext *subgraph_context); + static std::unique_ptr Create(NodeState *node_state, SubgraphContext *subgraph_context); ~TaskContext(); diff --git a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc index ec1caebd..da1abd0f 100644 --- a/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc +++ b/tests/ut/ge/graph/partition/dynamic_shape_partition_unittest.cc @@ -24,6 +24,7 @@ #include "inc/framework/common/types.h" #include "utils/graph_utils.h" #include "graph/debug/ge_attr_define.h" +#include "graph/common/omg_util.h" namespace ge { namespace { @@ -38,33 +39,33 @@ GeTensorDescPtr CreateTensorDesc(std::initializer_list shape, Format fo } class NodeBuilder { - public: - NodeBuilder(const std::string &name, const std::string &type) { op_desc_ = std::make_shared(name, type); } - - NodeBuilder &AddInputDesc(std::initializer_list shape = {1, 1, 224, 224}, Format format = FORMAT_NCHW, - DataType data_type = DT_FLOAT) { - op_desc_->AddInputDesc(CreateTensorDesc(shape, format, data_type)->Clone()); - return *this; - } - - NodeBuilder &AddOutputDesc(std::initializer_list shape = {1, 1, 224, 224}, Format format = FORMAT_NCHW, - DataType data_type = DT_FLOAT) { - op_desc_->AddOutputDesc(CreateTensorDesc(shape, format, data_type)->Clone()); - return *this; - } - - NodeBuilder &AddOutputDesc(GeTensorDescPtr tensor_desc) { - op_desc_->AddOutputDesc(tensor_desc->Clone()); - return *this; - } - - NodePtr Build(const ComputeGraphPtr &graph) { - NodePtr node = graph->AddNode(op_desc_); - return node; - } - - private: - OpDescPtr op_desc_; + public: + NodeBuilder(const std::string &name, const std::string &type) { op_desc_ = std::make_shared(name, type); } + + NodeBuilder &AddInputDesc(std::initializer_list shape = {1, 1, 224, 224}, Format format = FORMAT_NCHW, + DataType data_type = DT_FLOAT) { + op_desc_->AddInputDesc(CreateTensorDesc(shape, format, data_type)->Clone()); + return *this; + } + + NodeBuilder &AddOutputDesc(std::initializer_list shape = {1, 1, 224, 224}, Format format = FORMAT_NCHW, + DataType data_type = DT_FLOAT) { + op_desc_->AddOutputDesc(CreateTensorDesc(shape, format, data_type)->Clone()); + return *this; + } + + NodeBuilder &AddOutputDesc(GeTensorDescPtr tensor_desc) { + op_desc_->AddOutputDesc(tensor_desc->Clone()); + return *this; + } + + NodePtr Build(const ComputeGraphPtr &graph) { + NodePtr node = graph->AddNode(op_desc_); + return node; + } + + private: + OpDescPtr op_desc_; }; } // namespace @@ -93,28 +94,137 @@ TEST_F(UtestDynamicShapePartition, single_op_scene_success) { EXPECT_EQ(partitioner.Partition(), SUCCESS); } +/******************************************************************************* + * | + * Merge1 + * Active / \ Active + * / \. + * / \. + * Merge2 \. + * Active/ \Active \. + * / \ \. + * Add Sub Relu + * | | | + * | | | + * Switch_f2 Switch_t2 | + * \ / | + * \ / | + * Less2 | + * | | + * | | + * Switch_f Switch_t + * | \ / | + * | Active | + * | | | + * | Less1 | + * | / \ | + * | / \ | + * Data Data + ******************************************************************************/ TEST_F(UtestDynamicShapePartition, merge_control_flow_group) { ComputeGraphPtr graph = std::make_shared("default"); AttrUtils::SetStr(*graph, ATTR_NAME_SESSION_GRAPH_ID, "session_graph_id"); - NodePtr data1 = NodeBuilder("data1", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); - NodePtr data2 = NodeBuilder("data2", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); - NodePtr merge = NodeBuilder("node2", MERGE).AddInputDesc({1}).AddInputDesc({1}) - .AddOutputDesc({1}).AddOutputDesc({}).Build(graph); - - GraphUtils::AddEdge(data1->GetOutDataAnchor(0), merge->GetInDataAnchor(0)); - GraphUtils::AddEdge(data2->GetOutDataAnchor(0), merge->GetInDataAnchor(1)); - - (void)AttrUtils::SetBool(data1->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); - (void)AttrUtils::SetInt(data1->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); - (void)AttrUtils::SetBool(data2->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); - (void)AttrUtils::SetInt(data2->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); - (void)AttrUtils::SetBool(merge->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); - (void)AttrUtils::SetInt(merge->GetOpDesc(), ATTR_NAME_CONTROL_FLOW_GROUP, 3); + auto data1 = NodeBuilder("data1", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto data2 = NodeBuilder("data2", DATA).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + + auto less1 = NodeBuilder("less1", LESS).AddInputDesc({1}).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto active1 = NodeBuilder("active1", STREAMACTIVE).Build(graph); + auto switch_t = NodeBuilder("switch_t", STREAMSWITCH).AddInputDesc({1}).AddInputDesc({1}).Build(graph); + auto switch_f = NodeBuilder("switch_f", STREAMSWITCH).AddInputDesc({1}).AddInputDesc({1}).Build(graph); + auto const_01 = NodeBuilder("const_01", CONSTANT).AddOutputDesc({1}).Build(graph); + auto const_11 = NodeBuilder("const_11", CONSTANT).AddOutputDesc({1}).Build(graph); + + + auto less2 = NodeBuilder("less2", LESS).AddInputDesc({1}).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto active2 = NodeBuilder("active2", STREAMACTIVE).Build(graph); + auto switch_t2 = NodeBuilder("switch_t2", STREAMSWITCH).AddInputDesc({1}).AddInputDesc({1}).Build(graph); + auto switch_f2 = NodeBuilder("switch_f2", STREAMSWITCH).AddInputDesc({1}).AddInputDesc({1}).Build(graph); + auto const_02 = NodeBuilder("const_02", CONSTANT).AddOutputDesc({1}).Build(graph); + auto const_12 = NodeBuilder("const_12", CONSTANT).AddOutputDesc({1}).Build(graph); + + auto add2 = NodeBuilder("add2", ADD).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto sub2 = NodeBuilder("sub2", SUB).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto merge2 = NodeBuilder("merge2", STREAMMERGE).AddInputDesc({1}).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto active_f2 = NodeBuilder("active_f2", STREAMACTIVE).Build(graph); + auto active_t2 = NodeBuilder("active_t2", STREAMACTIVE).Build(graph); + + auto relu1 = NodeBuilder("relu1", RELU).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto merge1 = NodeBuilder("merge1", STREAMMERGE).AddInputDesc({1}).AddInputDesc({1}).AddOutputDesc({1}).Build(graph); + auto active_f1 = NodeBuilder("active_f1", STREAMACTIVE).Build(graph); + auto active_t1 = NodeBuilder("active_t1", STREAMACTIVE).Build(graph); + + auto output1 = NodeBuilder("noutput1", NETOUTPUT).AddInputDesc({1}).Build(graph); + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), less1->GetInDataAnchor(0)); + GraphUtils::AddEdge(data2->GetOutDataAnchor(0), less1->GetInDataAnchor(1)); + GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch_t->GetInDataAnchor(0)); + GraphUtils::AddEdge(less1->GetOutDataAnchor(0), switch_f->GetInDataAnchor(0)); + GraphUtils::AddEdge(const_01->GetOutDataAnchor(0), switch_t->GetInDataAnchor(1)); + GraphUtils::AddEdge(const_11->GetOutDataAnchor(0), switch_f->GetInDataAnchor(1)); + GraphUtils::AddEdge(less1->GetOutControlAnchor(), active1->GetInControlAnchor()); + GraphUtils::AddEdge(active1->GetOutControlAnchor(), switch_t->GetInControlAnchor()); + GraphUtils::AddEdge(active1->GetOutControlAnchor(), switch_f->GetInControlAnchor()); + + + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), less2->GetInDataAnchor(0)); + GraphUtils::AddEdge(less1->GetOutDataAnchor(0), less2->GetInDataAnchor(1)); + GraphUtils::AddEdge(less2->GetOutDataAnchor(0), switch_t2->GetInDataAnchor(0)); + GraphUtils::AddEdge(less2->GetOutDataAnchor(0), switch_f2->GetInDataAnchor(0)); + GraphUtils::AddEdge(const_02->GetOutDataAnchor(0), switch_t2->GetInDataAnchor(1)); + GraphUtils::AddEdge(const_12->GetOutDataAnchor(0), switch_f2->GetInDataAnchor(1)); + GraphUtils::AddEdge(less2->GetOutControlAnchor(), active2->GetInControlAnchor()); + GraphUtils::AddEdge(active2->GetOutControlAnchor(), switch_t2->GetInControlAnchor()); + GraphUtils::AddEdge(active2->GetOutControlAnchor(), switch_f2->GetInControlAnchor()); + + + GraphUtils::AddEdge(switch_f2->GetOutControlAnchor(), add2->GetInControlAnchor()); + GraphUtils::AddEdge(less2->GetOutDataAnchor(0), add2->GetInDataAnchor(0)); + GraphUtils::AddEdge(add2->GetOutDataAnchor(0), merge2->GetInDataAnchor(0)); + GraphUtils::AddEdge(add2->GetOutControlAnchor(), active_f2->GetInControlAnchor()); + GraphUtils::AddEdge(active_f2->GetOutControlAnchor(), merge2->GetInControlAnchor()); + + GraphUtils::AddEdge(switch_t2->GetOutControlAnchor(), sub2->GetInControlAnchor()); + GraphUtils::AddEdge(less2->GetOutDataAnchor(0), sub2->GetInDataAnchor(0)); + GraphUtils::AddEdge(sub2->GetOutDataAnchor(0), merge2->GetInDataAnchor(1)); + GraphUtils::AddEdge(sub2->GetOutControlAnchor(), active_t2->GetInControlAnchor()); + GraphUtils::AddEdge(active_t2->GetOutControlAnchor(), merge2->GetInControlAnchor()); + + GraphUtils::AddEdge(switch_t->GetOutControlAnchor(), less2->GetInControlAnchor()); + GraphUtils::AddEdge(switch_f->GetOutControlAnchor(), relu1->GetInControlAnchor()); + + + GraphUtils::AddEdge(merge2->GetOutDataAnchor(0), merge1->GetInDataAnchor(0)); + GraphUtils::AddEdge(merge2->GetOutControlAnchor(), active_f1->GetInControlAnchor()); + GraphUtils::AddEdge(active_f1->GetOutControlAnchor(), merge1->GetInControlAnchor()); + + GraphUtils::AddEdge(data2->GetOutDataAnchor(0), relu1->GetInDataAnchor(1)); + GraphUtils::AddEdge(relu1->GetOutDataAnchor(0), merge1->GetInDataAnchor(0)); + GraphUtils::AddEdge(relu1->GetOutControlAnchor(), active_t1->GetInControlAnchor()); + GraphUtils::AddEdge(active_t1->GetOutControlAnchor(), merge1->GetInControlAnchor()); + + GraphUtils::AddEdge(merge1->GetOutDataAnchor(0), output1->GetInDataAnchor(0)); + + AttrUtils::SetBool(merge2->GetOpDesc(), ATTR_NAME_FORCE_UNKNOWN_SHAPE, true); + EXPECT_EQ(graph->TopologicalSorting(), GRAPH_SUCCESS); + + SetControlFlowGroup(merge2, merge2->GetOpDesc()->GetId()); + SetControlFlowGroup(switch_f2, merge2->GetOpDesc()->GetId()); + SetControlFlowGroup(switch_t2, merge2->GetOpDesc()->GetId()); + SetControlFlowGroup(active2, merge2->GetOpDesc()->GetId()); + SetControlFlowGroup(active_t2, merge2->GetOpDesc()->GetId()); + SetControlFlowGroup(active_f2, merge2->GetOpDesc()->GetId()); + + SetControlFlowGroup(merge1, merge1->GetOpDesc()->GetId()); + SetControlFlowGroup(switch_f, merge1->GetOpDesc()->GetId()); + SetControlFlowGroup(switch_t, merge1->GetOpDesc()->GetId()); + SetControlFlowGroup(active1, merge1->GetOpDesc()->GetId()); + SetControlFlowGroup(active_f1, merge1->GetOpDesc()->GetId()); + SetControlFlowGroup(active_t1, merge1->GetOpDesc()->GetId()); EXPECT_EQ(graph->impl_->sub_graph_.size(), 0); DynamicShapePartitioner partitioner(graph); EXPECT_EQ(partitioner.Partition(), SUCCESS); - EXPECT_EQ(graph->impl_->sub_graph_.size(), 1); + EXPECT_EQ(graph->impl_->sub_graph_.size(), 3); // input less1 uknown } } // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc index 07022230..e0ccbfa5 100644 --- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc +++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc @@ -84,9 +84,6 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_kernel_task) { SubgraphContext subgraph_context(nullptr, &execution_context); NodeState node_state(*node_item, &subgraph_context); - auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); - auto shared_task_context = std::shared_ptr(task_context.release()); - node_state.SetTaskContext(shared_task_context); ExecutionEngine execution_engine; ASSERT_TRUE(node_state.GetTaskContext() != nullptr); @@ -119,14 +116,11 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { SubgraphContext subgraph_context(nullptr, &execution_context); NodeState node_state(*node_item, &subgraph_context); - auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); uint32_t task_id = 0; uint32_t stream_id = 1; std::string task_type = "rts"; uint32_t block_dim = 0; - task_context->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); - auto shared_task_context = std::shared_ptr(task_context.release()); - node_state.SetTaskContext(shared_task_context); + node_state.GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); ExecutionEngine execution_engine; ASSERT_TRUE(node_state.GetTaskContext() != nullptr); diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index f6c75d50..d634ed14 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -161,10 +161,8 @@ TEST_F(UtestGeHybrid, task_update_tiling_info) { GraphExecutionContext execution_context; SubgraphContext subgraph_context(nullptr, &execution_context); NodeState node_state(*node_item, &subgraph_context); - auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); - ASSERT_TRUE(task_context != nullptr); ASSERT_EQ(aicore_task->InitTilingInfo(*op_desc), SUCCESS); - ASSERT_EQ(aicore_task->UpdateTilingInfo(*task_context), SUCCESS); + ASSERT_EQ(aicore_task->UpdateTilingInfo(*node_state.GetTaskContext()), SUCCESS); } TEST_F(UtestGeHybrid, index_taskdefs_failed) { @@ -482,7 +480,7 @@ TEST_F(UtestGeHybrid, TestTaskContext) { subgraph_context.all_outputs_.resize(1); NodeState node_state(*node_item, &subgraph_context); - auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + auto task_context = node_state.GetTaskContext(); ASSERT_TRUE(task_context != nullptr); auto desc = task_context->MutableInputDesc(2); ASSERT_TRUE(desc == nullptr); @@ -527,7 +525,7 @@ TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) { subgraph_context.all_outputs_.resize(1); NodeState node_state(*node_item, &subgraph_context); - auto task_context = TaskContext::Create(&node_state, &execution_context, &subgraph_context); + auto task_context = node_state.GetTaskContext(); int32_t buffer[1]; aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer)); diff --git a/tests/ut/ge/hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc index a7a407a4..e4d211f9 100644 --- a/tests/ut/ge/hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc +++ b/tests/ut/ge/hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc @@ -97,11 +97,6 @@ TEST_F(UtestGeLocalNodeExecutor, test_no_op_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - NodeTaskPtr task = nullptr; GeLocalNodeExecutor node_executor; ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS); diff --git a/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc b/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc index 44b2f37f..109e5192 100644 --- a/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc +++ b/tests/ut/ge/hybrid/node_executor/rts/rts_node_task_unittest.cc @@ -96,11 +96,6 @@ TEST_F(UtestRtsNodeTask, test_stream_switch_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - uint64_t value_0 = 110; uint64_t value_1 = 120; TensorValue in_tensor0(&value_0, sizeof(value_0)); @@ -153,11 +148,6 @@ TEST_F(UtestRtsNodeTask, test_stream_active_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - NodeTaskPtr task = nullptr; RtsNodeExecutor node_executor; ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS); @@ -203,11 +193,6 @@ TEST_F(UtestRtsNodeTask, test_stream_merge_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - uint64_t value_0 = 110; TensorValue in_tensor0(&value_0, sizeof(value_0)); subgraph_context.SetInput(*node_item, 0, in_tensor0); @@ -271,11 +256,6 @@ TEST_F(UtestRtsNodeTask, test_memcpy_async_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - uint64_t value_0 = 110; TensorValue in_tensor0(&value_0, sizeof(value_0)); subgraph_context.SetInput(*node_item, 0, in_tensor0); @@ -328,11 +308,6 @@ TEST_F(UtestRtsNodeTask, test_pass_through_task) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - uint64_t value_0 = 110; TensorValue in_tensor0(&value_0, sizeof(value_0)); subgraph_context.SetInput(*node_item, 0, in_tensor0); @@ -384,11 +359,6 @@ TEST_F(UtestRtsNodeTask, test_unsupport_label_set) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - NodeTaskPtr task = nullptr; RtsNodeExecutor node_executor; ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS); @@ -428,11 +398,6 @@ TEST_F(UtestRtsNodeTask, test_unsupport_label_goto) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - NodeTaskPtr task = nullptr; RtsNodeExecutor node_executor; ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS); @@ -472,11 +437,6 @@ TEST_F(UtestRtsNodeTask, test_unsupport_label_switch) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - NodeTaskPtr task = nullptr; RtsNodeExecutor node_executor; ASSERT_EQ(node_executor.LoadTask(hybrid_model, node, task), SUCCESS); From 8852766766ec531f47227d237706b04fc53dff8d Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 12 Jun 2021 13:16:42 +0800 Subject: [PATCH 09/51] Fix hccl_node_executor_unittest --- .../node_executor/hccl/hccl_node_executor_unittest.cc | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc index afaf067e..8e6630f6 100644 --- a/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc +++ b/tests/ut/ge/hybrid/node_executor/hccl/hccl_node_executor_unittest.cc @@ -94,18 +94,17 @@ TEST_F(UtestHcclNodeExecutor, test_rdmatask_extract_tensor) { tensor.SetData(data); ctx->SetTensor(1, 0, tensor.Clone()); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); vector addr_infos; shared_ptr task = MakeShared(); task->remote_index_ = {1, 0}; - ASSERT_EQ(task->ExtractTensor(*unique_task_context, addr_infos), PARAM_INVALID); + ASSERT_EQ(task->ExtractTensor(*node_state->GetTaskContext(), addr_infos), PARAM_INVALID); Shape s2({1}); TensorDesc tensor_desc2(s2); Tensor tensor2(tensor_desc2); ctx->SetTensor(1, 0, tensor2.Clone()); - task->ExtractTensor(*unique_task_context, addr_infos); - ASSERT_EQ(task->ExtractTensor(*unique_task_context, addr_infos), PARAM_INVALID); + task->ExtractTensor(*node_state->GetTaskContext(), addr_infos); + ASSERT_EQ(task->ExtractTensor(*node_state->GetTaskContext(), addr_infos), PARAM_INVALID); RuntimeInferenceContext::DestroyContext(std::to_string(graph_context.context_id)); } @@ -140,11 +139,6 @@ TEST_F(UtestHcclNodeExecutor, gatheralltoallv_execute) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - for (int i=0; i<4; ++i) { uint64_t value_0 = 512; TensorValue in_tensor0(&value_0, sizeof(value_0)); @@ -206,11 +200,6 @@ TEST_F(UtestHcclNodeExecutor, alltoallv_execute) { auto node_state = subgraph_context.GetOrCreateNodeState(node_item); ASSERT_NE(node_state, nullptr); - auto unique_task_context = TaskContext::Create(node_state.get(), &graph_context, &subgraph_context); - ASSERT_NE(unique_task_context, nullptr); - auto shared_task_context = std::shared_ptr(unique_task_context.release()); - node_state->SetTaskContext(shared_task_context); - for (int i=0; i<5; ++i) { uint64_t value_0 = 512; TensorValue in_tensor0(&value_0, sizeof(value_0)); From ab65075326c2758b5054abb766ca3275b0e26e94 Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 12 Jun 2021 17:52:12 +0800 Subject: [PATCH 10/51] Add Init to NodeState --- ge/hybrid/executor/node_state.cc | 9 +++++++++ ge/hybrid/executor/node_state.h | 10 ++-------- ge/hybrid/executor/subgraph_context.cc | 25 ++++++++++++++++++------- ge/hybrid/executor/subgraph_context.h | 1 + 4 files changed, 30 insertions(+), 15 deletions(-) diff --git a/ge/hybrid/executor/node_state.cc b/ge/hybrid/executor/node_state.cc index 42e08811..468c84e6 100644 --- a/ge/hybrid/executor/node_state.cc +++ b/ge/hybrid/executor/node_state.cc @@ -259,8 +259,16 @@ ShapeFuture::ShapeFuture(NodeState *src_node, NodeState::NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context) : node_item_(&node_item), shape_inference_state_(node_item), subgraph_context_(subgraph_context) { this->op_desc_ = node_item.node->GetOpDesc(); +} + +Status NodeState::Init(int group, const shared_ptr &frame_state) { + GE_CHECK_NOTNULL(frame_state); + group_ = group; + frame_state_ = frame_state; auto unique_task_context = TaskContext::Create(this, subgraph_context_); + GE_CHECK_NOTNULL(unique_task_context); task_context_ = std::shared_ptr(unique_task_context.release()); + return SUCCESS; } Status NodeState::AwaitInputTensors(GraphExecutionContext &context) const { @@ -350,6 +358,7 @@ void NodeState::ResetContext(uint64_t iteration) { switch_index_ = -1; subgraph_context_->ResetContext(node_item_->node); auto unique_task_context = TaskContext::Create(this, subgraph_context_); + GE_CHECK_NOTNULL_JUST_RETURN(unique_task_context); task_context_ = std::shared_ptr(unique_task_context.release()); data_scheduled_ = static_cast(node_item_->root_data_.size()); diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 72e2b90e..85f9e4c3 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -100,6 +100,8 @@ struct NodeState { NodeState(const NodeItem &node_item, SubgraphContext *subgraph_context); ~NodeState() = default; + Status Init(int group, const shared_ptr &frame_state); + OpDesc *GetOpDesc() const { return op_desc_.get(); } @@ -152,18 +154,10 @@ struct NodeState { return merge_index_; } - void SetGroup(int group) { - group_ = group; - } - int GetGroup() const { return group_; } - void SetFrameState(const shared_ptr &frame_state) { - frame_state_ = frame_state; - } - const shared_ptr &GetKernelTask() const { return kernel_task_; } diff --git a/ge/hybrid/executor/subgraph_context.cc b/ge/hybrid/executor/subgraph_context.cc index 41ada9af..5e97a9a2 100644 --- a/ge/hybrid/executor/subgraph_context.cc +++ b/ge/hybrid/executor/subgraph_context.cc @@ -79,20 +79,31 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) { return nullptr; } + return CreateNodeState(node_item); +} + +NodeStatePtr SubgraphContext::CreateNodeState(const NodeItem *node_item) { GELOGD("[%s] lock for write", node_item->NodeName().c_str()); if (mmRWLockWRLock(&rw_lock_) != EN_OK) { REPORT_CALL_ERROR("E19999", "[Node:%s] Lock for write failed", node_item->NodeName().c_str()); GELOGE(INTERNAL_ERROR, "[RWLock][Lock][Node:%s] Lock for write failed", node_item->NodeName().c_str()); return nullptr; } + auto &node_state = node_states_[node_item]; - if (node_state == nullptr) { - const auto &guard = node_item->MutexGuard("GetOrCreateNodeState"); - node_state.reset(new(std::nothrow)NodeState(*node_item, this)); - node_state->SetFrameState(GetOrCreateFrameState(*node_item)); - node_state->SetGroup(group_); - (void)guard; - } + do { + if (node_state == nullptr) { + const auto &guard = node_item->MutexGuard("GetOrCreateNodeState"); + node_state.reset(new(std::nothrow)NodeState(*node_item, this)); + if (node_state == nullptr || node_state->Init(group_, GetOrCreateFrameState(*node_item)) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "[Create][NodeState] failed for[%s].", node_item->NodeName().c_str()); + REPORT_CALL_ERROR("E19999", "Create NodeState failed for %s.", node_item->NodeName().c_str()); + break; + } + (void)guard; + } + } while (0); + GELOGD("[%s] unlock for write", node_item->NodeName().c_str()); if (mmWRLockUnLock(&rw_lock_) != EN_OK) { REPORT_CALL_ERROR("E19999", "[Node:%s] Unlock for write failed", node_item->NodeName().c_str()); diff --git a/ge/hybrid/executor/subgraph_context.h b/ge/hybrid/executor/subgraph_context.h index d11d00d7..023be981 100644 --- a/ge/hybrid/executor/subgraph_context.h +++ b/ge/hybrid/executor/subgraph_context.h @@ -51,6 +51,7 @@ class SubgraphContext { void NodeDone(const NodePtr &node); private: + NodeStatePtr CreateNodeState(const NodeItem *node_item); FrameStatePtr GetOrCreateFrameState(const NodeItem &node_item); // no lock friend class TaskContext; const GraphItem *graph_item_; From f578e8fff4f958e1ec52b8e0c73b6dbc95e7c77d Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Sat, 12 Jun 2021 18:36:32 +0800 Subject: [PATCH 11/51] Fix NodeState for UT --- .../executor/worker/execution_engine_unittest.cc | 23 +++++++++++----------- tests/ut/ge/hybrid/ge_hybrid_unittest.cc | 20 +++++++++++-------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc index e0ccbfa5..cc20d614 100644 --- a/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc +++ b/tests/ut/ge/hybrid/executor/worker/execution_engine_unittest.cc @@ -83,15 +83,14 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_kernel_task) { execution_context.profiling_level = 1; SubgraphContext subgraph_context(nullptr, &execution_context); - NodeState node_state(*node_item, &subgraph_context); - - ExecutionEngine execution_engine; - ASSERT_TRUE(node_state.GetTaskContext() != nullptr); + auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); + ASSERT_TRUE(node_state->GetTaskContext() != nullptr); std::function callback; SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &execution_context); - executor.InitCallback(&node_state, callback); - EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context, callback), INTERNAL_ERROR); + executor.InitCallback(node_state.get(), callback); + ExecutionEngine execution_engine; + EXPECT_EQ(execution_engine.ExecuteAsync(*node_state, node_state->GetTaskContext(), execution_context, callback), INTERNAL_ERROR); } TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { @@ -115,18 +114,18 @@ TEST_F(UtestExecutionEngine, ExecuteAsync_without_callback_and_kernel_task) { execution_context.model = &hybrid_model; SubgraphContext subgraph_context(nullptr, &execution_context); - NodeState node_state(*node_item, &subgraph_context); + auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); uint32_t task_id = 0; uint32_t stream_id = 1; std::string task_type = "rts"; uint32_t block_dim = 0; - node_state.GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); + node_state->GetTaskContext()->SaveProfilingTaskDescInfo(task_id, stream_id, task_type, block_dim); - ExecutionEngine execution_engine; - ASSERT_TRUE(node_state.GetTaskContext() != nullptr); + ASSERT_TRUE(node_state->GetTaskContext() != nullptr); std::function callback; SubgraphExecutor executor(hybrid_model.GetRootGraphItem(), &execution_context); - executor.InitCallback(&node_state, callback); - EXPECT_EQ(execution_engine.ExecuteAsync(node_state, node_state.GetTaskContext(), execution_context, callback), INTERNAL_ERROR); + executor.InitCallback(node_state.get(), callback); + ExecutionEngine execution_engine; + EXPECT_EQ(execution_engine.ExecuteAsync(*node_state, node_state->GetTaskContext(), execution_context, callback), INTERNAL_ERROR); } diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index d634ed14..228af832 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -160,9 +160,9 @@ TEST_F(UtestGeHybrid, task_update_tiling_info) { GraphExecutionContext execution_context; SubgraphContext subgraph_context(nullptr, &execution_context); - NodeState node_state(*node_item, &subgraph_context); + auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); ASSERT_EQ(aicore_task->InitTilingInfo(*op_desc), SUCCESS); - ASSERT_EQ(aicore_task->UpdateTilingInfo(*node_state.GetTaskContext()), SUCCESS); + ASSERT_EQ(aicore_task->UpdateTilingInfo(*node_state->GetTaskContext()), SUCCESS); } TEST_F(UtestGeHybrid, index_taskdefs_failed) { @@ -475,12 +475,14 @@ TEST_F(UtestGeHybrid, TestTaskContext) { node_item->output_start = 0; GraphExecutionContext execution_context; - SubgraphContext subgraph_context(nullptr, &execution_context); + GraphItem graph_item; + SubgraphContext subgraph_context(&graph_item, &execution_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); subgraph_context.all_inputs_.resize(2); subgraph_context.all_outputs_.resize(1); - NodeState node_state(*node_item, &subgraph_context); - auto task_context = node_state.GetTaskContext(); + auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); + auto task_context = node_state->GetTaskContext(); ASSERT_TRUE(task_context != nullptr); auto desc = task_context->MutableInputDesc(2); ASSERT_TRUE(desc == nullptr); @@ -520,12 +522,14 @@ TEST_F(UtestGeHybrid, hybrid_model_executor_update_args) { node_item->output_start = 0; GraphExecutionContext execution_context; - SubgraphContext subgraph_context(nullptr, &execution_context); + GraphItem graph_item; + SubgraphContext subgraph_context(&graph_item, &execution_context); + ASSERT_EQ(subgraph_context.Init(), SUCCESS); subgraph_context.all_inputs_.resize(2); subgraph_context.all_outputs_.resize(1); - NodeState node_state(*node_item, &subgraph_context); - auto task_context = node_state.GetTaskContext(); + auto node_state = subgraph_context.GetOrCreateNodeState(node_item.get()); + auto task_context = node_state->GetTaskContext(); int32_t buffer[1]; aicore_task->tiling_buffer_ = TensorBuffer::Create(buffer, sizeof(buffer)); From 367774c5b009edc3d8838163629a37925692e611 Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Tue, 15 Jun 2021 14:44:44 +0800 Subject: [PATCH 12/51] enable optimization --- ge/graph/optimize/graph_optimize.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/ge/graph/optimize/graph_optimize.cc b/ge/graph/optimize/graph_optimize.cc index 835e257b..55f374eb 100644 --- a/ge/graph/optimize/graph_optimize.cc +++ b/ge/graph/optimize/graph_optimize.cc @@ -336,10 +336,8 @@ Status GraphOptimize::OptimizeAfterStage1(ComputeGraphPtr &compute_graph) { GELOGI("[OptimizeAfterStage1]: engine type will exclude:%s.", exclude_core_type.c_str()); continue; } -#ifndef ONLY_COMPILE_OPEN_SRC GELOGI("Begin to optimize graph after stage1 by engine %s.", iter->first.c_str()); ret = (iter->second)->OptimizeAfterStage1(*compute_graph); -#endif if (ret != SUCCESS) { REPORT_INNER_ERROR("E19999", "Call OptimizeAfterStage1 failed, ret:%d, engine_name:%s, " "graph_name:%s.", ret, iter->first.c_str(), compute_graph->GetName().c_str()); From 181cd5891bd97b4aca9f28330e1f0a20def75e69 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 16:46:28 +0800 Subject: [PATCH 13/51] Release context in execute end. --- ge/hybrid/executor/hybrid_model_executor.cc | 1 + ge/hybrid/executor/subgraph_executor.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index b3c2c471..b4173407 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -125,6 +125,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(HybridModelExecutor::ExecuteArg args.outputs.clear(); HYBRID_CHK_STATUS_RET(root_graph_executor_->GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); + root_graph_executor_->ResetContext(); return SUCCESS; } diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 758bf426..0f54e4ca 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -41,6 +41,8 @@ class SubgraphExecutor { Status PartialExecuteAsync(int task_group); + void ResetContext() { subgraph_context_.release(); } + /** * Execute subgraph async, output tensor address(not data) and output tensor descriptions are * valid after this method returned From ab7334ed780343a80c885a1b064d1a42fa51faf0 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 16:50:40 +0800 Subject: [PATCH 14/51] Release context in execute end. --- ge/hybrid/executor/hybrid_model_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index b4173407..2abd9cd6 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -70,6 +70,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { context_.profiler->Dump(std::cout); context_.profiler->Reset(); } + root_graph_executor_->ResetContext(); context_.iteration += 1; if (ret == END_OF_SEQUENCE) { @@ -125,7 +126,6 @@ Status HybridModelExecutor::ExecuteGraphInternal(HybridModelExecutor::ExecuteArg args.outputs.clear(); HYBRID_CHK_STATUS_RET(root_graph_executor_->GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); - root_graph_executor_->ResetContext(); return SUCCESS; } From 492d36b237ec601da5644054ab3eed4c4fbfd6d7 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 20:01:06 +0800 Subject: [PATCH 15/51] Fix ut. --- tests/ut/ge/single_op/single_op_model_unittest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index 1cb2b22c..fb772c33 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -223,7 +223,7 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { model.model_helper_.model_->SetGraph(graph); auto op_desc = transdata->GetOpDesc(); - op_desc->input_name_idx_["Data"] = 0; + op_desc->impl_->input_name_idx_["Data"] = 0; const vector depend_names = { "Data" }; op_desc->SetOpInferDepends(depend_names); (void)AttrUtils::SetBool(op_desc, kAttrSupportDynamicShape, true); From 0c2d07eb7250e5cad532a906691f48b4dd48b552 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 21:44:09 +0800 Subject: [PATCH 16/51] Fix ut. --- ge/hybrid/executor/subgraph_executor.cc | 9 ++++++--- ge/hybrid/executor/worker/shape_inference_engine.cc | 2 +- tests/ut/ge/single_op/single_op_model_unittest.cc | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 612e7565..4f0566b4 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -100,9 +100,12 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOrCreateNodeState(input_node); - GE_CHECK_NOTNULL(node_state); - node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); + auto op_desc = input_node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + auto output_desc = op_desc->MutableOutputDesc(kDataInputIndex); + output_desc.SetShape(tensor_desc->GetShape()); + output_desc.SetOriginShape(tensor_desc->GetOriginShape()); + output_desc.SetDataType(tensor_desc->GetDataType()); } } diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index a2efbb25..4dc5b79c 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -69,7 +69,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); - { + if (node_state.GetType() != DATA_TYPE && node_state.GetType() != AIPP_DATA_TYPE) { RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "[Invoke][InferShapeAndType] for %s failed.", node_item.NodeName().c_str()); diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index fb772c33..cb0b497d 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -30,6 +30,7 @@ #include "single_op/single_op.h" #include "single_op/stream_resource.h" #include "graph/passes/graph_builder_utils.h" +#include "graph/op_desc_impl.h" #undef private #undef protected From 7ce31b2e0ec853582645d45336874c1262424b44 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 22:06:11 +0800 Subject: [PATCH 17/51] Fix ut. --- ge/hybrid/executor/subgraph_executor.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 4f0566b4..b474c5dd 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -103,9 +103,10 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOpDesc(); GE_CHECK_NOTNULL(op_desc); auto output_desc = op_desc->MutableOutputDesc(kDataInputIndex); - output_desc.SetShape(tensor_desc->GetShape()); - output_desc.SetOriginShape(tensor_desc->GetOriginShape()); - output_desc.SetDataType(tensor_desc->GetDataType()); + GE_CHECK_NOTNULL(output_desc); + output_desc->SetShape(tensor_desc->GetShape()); + output_desc->SetOriginShape(tensor_desc->GetOriginShape()); + output_desc->SetDataType(tensor_desc->GetDataType()); } } From 24eedfa3b4df7eb41fbb13f36759f7537500209a Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Tue, 15 Jun 2021 22:09:17 +0800 Subject: [PATCH 18/51] Fix ut. --- ge/hybrid/executor/worker/shape_inference_engine.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 4dc5b79c..18fed710 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -68,6 +68,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { } // Do shape inference + // Skipping infer shape of input node. GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); if (node_state.GetType() != DATA_TYPE && node_state.GetType() != AIPP_DATA_TYPE) { RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); From b64048a39f53e30fcc69491bf0c93c805bdec3b8 Mon Sep 17 00:00:00 2001 From: xchu42 Date: Sat, 12 Jun 2021 14:01:24 +0800 Subject: [PATCH 19/51] Init NodeExecutor on demand --- ge/hybrid/node_executor/node_executor.cc | 75 +++++++-------- ge/hybrid/node_executor/node_executor.h | 7 +- tests/ut/ge/CMakeLists.txt | 2 + .../hybrid/node_executor/node_executor_unittest.cc | 103 +++++++++++++++++++++ 4 files changed, 143 insertions(+), 44 deletions(-) create mode 100644 tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 5f3d6e45..04225557 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -58,8 +58,8 @@ Status NodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, } Status NodeExecutorManager::EnsureInitialized() { - GE_CHK_STATUS_RET(InitializeExecutors()); std::lock_guard lk(mu_); + ++ref_count_; if (initialized_) { return SUCCESS; } @@ -115,17 +115,14 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return it->second; } -Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) const { +Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) { auto executor_type = ResolveExecutorType(node); + GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); const auto it = executors_.find(executor_type); if (it == executors_.end()) { - REPORT_INNER_ERROR("E19999", "Failed to get executor by type: %d.", static_cast(executor_type)); - GELOGE(INTERNAL_ERROR, "[Check][ExecutorType]Failed to get executor by type: %d.", - static_cast(executor_type)); - return INTERNAL_ERROR; + return GetOrCreateExecutor(executor_type, executor); } - GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); *executor = it->second.get(); return SUCCESS; } @@ -178,51 +175,50 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { return OpsKernelBuilderManager::Instance().CalcOpRunningParam(node); } -Status NodeExecutorManager::InitializeExecutors() { +Status NodeExecutorManager::GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **out_executor) { std::lock_guard lk(mu_); - if (executor_initialized_) { - ++ref_count_; - GELOGI("Executor is already initialized. add ref count to [%d]", ref_count_); + const auto executor_it = executors_.find(executor_type); + if (executor_it != executors_.end()) { + *out_executor = executor_it->second.get(); return SUCCESS; } - GELOGI("Start to Initialize NodeExecutors"); - for (auto &it : builders_) { - auto engine_type = it.first; - auto build_fn = it.second; - GE_CHECK_NOTNULL(build_fn); - auto executor = std::unique_ptr(build_fn()); - if (executor == nullptr) { - REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for engine type = %d", - static_cast(engine_type)); - GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(engine_type)); - return INTERNAL_ERROR; - } + GELOGI("Start to Initialize NodeExecutor, type = %d", static_cast(executor_type)); + auto it = builders_.find(executor_type); + if (it == builders_.end()) { + REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", + static_cast(executor_type)); + GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for executor type = %d", static_cast(executor_type)); + return INTERNAL_ERROR; + } - GELOGD("Executor of engine type = %d was created successfully", static_cast(engine_type)); - auto ret = executor->Initialize(); - if (ret != SUCCESS) { - REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(engine_type)); - GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(engine_type)); - for (auto &executor_it : executors_) { - executor_it.second->Finalize(); - } - executors_.clear(); - return ret; - } + auto build_fn = it->second; + GE_CHECK_NOTNULL(build_fn); + auto executor = std::unique_ptr(build_fn()); + if (executor == nullptr) { + REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", + static_cast(executor_type)); + GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(executor_type)); + return INTERNAL_ERROR; + } - executors_.emplace(engine_type, std::move(executor)); + GELOGD("Executor of engine type = %d was created successfully", static_cast(executor_type)); + auto ret = executor->Initialize(); + if (ret != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(executor_type)); + GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(executor_type)); + return ret; } - ++ref_count_; - executor_initialized_ = true; - GELOGI("Initializing NodeExecutors successfully."); + *out_executor = executor.get(); + executors_.emplace(executor_type, std::move(executor)); + GELOGI("Initializing NodeExecutor successfully, type = %d", static_cast(executor_type)); return SUCCESS; } void NodeExecutorManager::FinalizeExecutors() { std::lock_guard lk(mu_); - if (!executor_initialized_) { + if (ref_count_ <= 0) { GELOGD("No need for finalizing for not initialized."); return; } @@ -237,7 +233,6 @@ void NodeExecutorManager::FinalizeExecutors() { it.second->Finalize(); } executors_.clear(); - executor_initialized_ = false; GELOGD("Done invoking Finalize successfully."); } diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index fffd4e7d..97c9cee9 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -179,8 +179,6 @@ class NodeExecutorManager { */ Status EnsureInitialized(); - Status InitializeExecutors(); - void FinalizeExecutors(); /** @@ -196,7 +194,7 @@ class NodeExecutorManager { * @param executor executor * @return SUCCESS on success, error code otherwise */ - Status GetExecutor(Node &node, const NodeExecutor **executor) const; + Status GetExecutor(Node &node, const NodeExecutor **executor); /** * Resolve executor type by node @@ -206,12 +204,13 @@ class NodeExecutorManager { ExecutorType ResolveExecutorType(Node &node) const; private: + Status GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **executor); + std::map> executors_; std::map> builders_; std::map engine_mapping_; std::mutex mu_; bool initialized_ = false; - bool executor_initialized_ = false; int ref_count_ = 0; }; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 63579109..cd3d541c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -836,6 +836,7 @@ set(HYBRID_TEST_FILES "hybrid/executor/subgraph_executor_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" + "hybrid/node_executor/node_executor_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" @@ -843,6 +844,7 @@ set(HYBRID_TEST_FILES "hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" + ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc new file mode 100644 index 00000000..8a1240d3 --- /dev/null +++ b/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc @@ -0,0 +1,103 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "hybrid/node_executor/node_executor.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; + +namespace ge { +using namespace hybrid; + +namespace { + bool finalized = false; +} + +class NodeExecutorTest : public testing::Test { + protected: + void SetUp() {} + void TearDown() { } +}; + +class FailureNodeExecutor : public NodeExecutor { + public: + Status Initialize() override { + return INTERNAL_ERROR; + } +}; + +class SuccessNodeExecutor : public NodeExecutor { + public: + Status Initialize() override { + initialized = true; + finalized = false; + return SUCCESS; + } + + Status Finalize() override { + finalized = true; + } + + bool initialized = false; +}; + +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, FailureNodeExecutor); +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, SuccessNodeExecutor); + +TEST_F(NodeExecutorTest, TestGetOrCreateExecutor) { + auto &manager = NodeExecutorManager::GetInstance(); + const NodeExecutor *executor = nullptr; + Status ret = SUCCESS; + // no builder + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::RESERVED, &executor); + ASSERT_EQ(ret, INTERNAL_ERROR); + // initialize failure + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICORE, &executor); + ASSERT_EQ(ret, INTERNAL_ERROR); + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); +} + +TEST_F(NodeExecutorTest, TestInitAndFinalize) { + auto &manager = NodeExecutorManager::GetInstance(); + manager.FinalizeExecutors(); + manager.EnsureInitialized(); + manager.EnsureInitialized(); + const NodeExecutor *executor = nullptr; + auto ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); + manager.FinalizeExecutors(); + ASSERT_FALSE(manager.executors_.empty()); + manager.FinalizeExecutors(); + ASSERT_TRUE(manager.executors_.empty()); + ASSERT_TRUE(finalized); +} +} // namespace ge From d1eba02e1e972da774c2ddd474fef242f31b14d5 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 09:28:41 +0800 Subject: [PATCH 20/51] Fix ut. --- tests/ut/ge/single_op/single_op_model_unittest.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index fb772c33..cb0b497d 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -30,6 +30,7 @@ #include "single_op/single_op.h" #include "single_op/stream_resource.h" #include "graph/passes/graph_builder_utils.h" +#include "graph/op_desc_impl.h" #undef private #undef protected From 69da59b6b790cd76dca11413b5e342aab6a56caa Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 09:40:26 +0800 Subject: [PATCH 21/51] Fix ut. --- ge/hybrid/executor/subgraph_executor.cc | 5 ++++- tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc | 4 ++-- tests/ut/ge/single_op/single_op_model_unittest.cc | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index 7081c8f4..c26eac9b 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -100,13 +100,16 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorGetOrCreateNodeState(input_node); + GE_CHECK_NOTNULL(node_state); + node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); auto op_desc = input_node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); auto output_desc = op_desc->MutableOutputDesc(kDataInputIndex); GE_CHECK_NOTNULL(output_desc); output_desc->SetShape(tensor_desc->GetShape()); output_desc->SetOriginShape(tensor_desc->GetOriginShape()); - output_desc->SetDataType(tensor_desc->GetDataType()); + output_desc->SetDataType(tensor_desc->GetDataType()); } } diff --git a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc index 52537ee2..98bb78f2 100644 --- a/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc +++ b/tests/ut/ge/hybrid/executor/hybrid_model_async_executor_unittest.cc @@ -87,7 +87,7 @@ TEST_F(UtestHybridModelAsyncExecutor, BuildDeviceTensor) { ASSERT_EQ(size, 100); } -TEST_F(UtestHybridModelAsyncExecutor, Test_execute_internal) { +TEST_F(UtestHybridModelAsyncExecutor, Test_execute) { ComputeGraphPtr graph = std::make_shared("test"); GeRootModelPtr ge_root_model = make_shared(graph); ge_root_model->SetModelName("test_name"); @@ -101,6 +101,6 @@ TEST_F(UtestHybridModelAsyncExecutor, Test_execute_internal) { std::pair> eof_entry; eof_entry.first = nullptr; context.callback_manager->callback_queue_.Push(eof_entry); - ASSERT_EQ(executor.ExecuteGraphInternal(args), SUCCESS); + ASSERT_EQ(executor.Execute(args), SUCCESS); } } // namespace ge \ No newline at end of file diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index cb0b497d..63a3eafe 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -224,7 +224,6 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { model.model_helper_.model_->SetGraph(graph); auto op_desc = transdata->GetOpDesc(); - op_desc->impl_->input_name_idx_["Data"] = 0; const vector depend_names = { "Data" }; op_desc->SetOpInferDepends(depend_names); (void)AttrUtils::SetBool(op_desc, kAttrSupportDynamicShape, true); @@ -247,6 +246,9 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { DynamicSingleOp dynamic_single_op(0, &stream_mu_, nullptr); StreamResource res((uintptr_t)1); model.BuildDynamicOp(res, dynamic_single_op); + + op_desc->impl_->input_name_idx_["Data"] = 0; + model.BuildDynamicOp(res, dynamic_single_op); } TEST_F(UtestSingleOpModel, test_host_mem) { From 58086ab1872f2fc7374dc7b969a3fdcb206b7841 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 11:14:05 +0800 Subject: [PATCH 22/51] Release mem. --- ge/hybrid/executor/subgraph_executor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 7e1c2d0b..35f6e67e 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -41,7 +41,7 @@ class SubgraphExecutor { Status PartialExecuteAsync(int task_group); - void ResetContext() { subgraph_context_.release(); } + void ResetContext() { subgraph_context_.reset(nullptr); } /** * Execute subgraph async, output tensor address(not data) and output tensor descriptions are From 116167dc88160c6f6c10703f0ad3c8bd570b48eb Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Tue, 15 Jun 2021 11:49:11 +0800 Subject: [PATCH 23/51] ge code for 1981 --- ge/CMakeLists.txt | 2 + ge/executor/CMakeLists.txt | 1 + ge/graph/build/label_allocator.cc | 5 + ge/graph/build/logical_stream_allocator.cc | 5 + ge/graph/build/stream_allocator.cc | 11 +- ge/graph/build/task_generator.cc | 32 +- ge/graph/build/task_generator.h | 1 + ge/graph/load/model_manager/davinci_model.cc | 207 ++++++++--- ge/graph/load/model_manager/davinci_model.h | 6 + .../load/model_manager/task_info/ffts_task_info.cc | 393 +++++++++++++++++++++ .../load/model_manager/task_info/ffts_task_info.h | 66 ++++ ge/graph/partition/graph_partition.cc | 21 +- metadef | 2 +- parser | 2 +- tests/depends/runtime/src/runtime_stub.cc | 4 + tests/ut/ge/CMakeLists.txt | 2 + tests/ut/ge/graph/load/davinci_model_unittest.cc | 140 ++++++++ tests/ut/ge/graph/load/ffts_task_info_unittest.cc | 212 +++++++++++ third_party/fwkacllib/inc/runtime/rt.h | 1 + third_party/fwkacllib/inc/runtime/rt_ffts.h | 185 ++++++++++ third_party/fwkacllib/inc/runtime/rt_model.h | 1 + 21 files changed, 1235 insertions(+), 64 deletions(-) create mode 100644 ge/graph/load/model_manager/task_info/ffts_task_info.cc create mode 100644 ge/graph/load/model_manager/task_info/ffts_task_info.h create mode 100644 tests/ut/ge/graph/load/ffts_task_info_unittest.cc create mode 100755 third_party/fwkacllib/inc/runtime/rt_ffts.h diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 215d2832..81e2d539 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -174,6 +174,7 @@ set(TRAIN_SRC_LIST "graph/load/model_manager/task_info/model_exit_task_info.cc" "graph/load/model_manager/task_info/event_record_task_info.cc" "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/ffts_task_info.cc" "graph/load/model_manager/task_info/fusion_start_task_info.cc" "graph/load/model_manager/task_info/fusion_stop_task_info.cc" "graph/load/model_manager/task_info/hccl_task_info.cc" @@ -662,6 +663,7 @@ set(INFER_SRC_LIST "graph/load/model_manager/task_info/task_info.cc" "graph/load/model_manager/task_info/event_record_task_info.cc" "graph/load/model_manager/task_info/event_wait_task_info.cc" + "graph/load/model_manager/task_info/ffts_task_info.cc" "graph/load/model_manager/task_info/fusion_start_task_info.cc" "graph/load/model_manager/task_info/fusion_stop_task_info.cc" "graph/load/model_manager/task_info/kernel_ex_task_info.cc" diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt index f1267c1e..b04216b8 100644 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -37,6 +37,7 @@ set(SRC_LIST "../graph/load/model_manager/task_info/task_info.cc" "../graph/load/model_manager/task_info/event_record_task_info.cc" "../graph/load/model_manager/task_info/event_wait_task_info.cc" + "../graph/load/model_manager/task_info/ffts_task_info.cc" "../graph/load/model_manager/task_info/fusion_start_task_info.cc" "../graph/load/model_manager/task_info/fusion_stop_task_info.cc" "../graph/load/model_manager/task_info/kernel_ex_task_info.cc" diff --git a/ge/graph/build/label_allocator.cc b/ge/graph/build/label_allocator.cc index 32bdd0a3..dd7ee828 100644 --- a/ge/graph/build/label_allocator.cc +++ b/ge/graph/build/label_allocator.cc @@ -86,6 +86,11 @@ bool LabelAllocator::CollectFunctionalNode(ComputeGraphPtr &graph, std::setGetOpDesc() != nullptr && func_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { + GELOGD("Graph[%s] is ffts subgraph, skip label allocator.", graph->GetName().c_str()); + return true; + } + ComputeGraphPtr owner_graph = func_node->GetOwnerComputeGraph(); if (owner_graph == nullptr) { REPORT_INNER_ERROR("E19999", "ComputeGraph owner not set in node:%s(%s), graph:%s", diff --git a/ge/graph/build/logical_stream_allocator.cc b/ge/graph/build/logical_stream_allocator.cc index c74cdf7a..58763aa9 100644 --- a/ge/graph/build/logical_stream_allocator.cc +++ b/ge/graph/build/logical_stream_allocator.cc @@ -474,6 +474,11 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vectorGetDirectNode()) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); + if (op_desc->HasAttr(ATTR_NAME_THREAD_SCOPE_ID)) { + op_desc->SetStreamId(kInvalidStream); + GELOGI("Ffts node %s of type %s reassign to invalid stream.", node->GetName().c_str(), node->GetType().c_str()); + continue; + } int64_t stream_id = op_desc->GetStreamId(); if (ops_without_label.find(op_desc) != ops_without_label.end()) { if (AreAllPredStreamsInvalid(node) && op_desc->GetSubgraphInstanceNames().empty()) { diff --git a/ge/graph/build/stream_allocator.cc b/ge/graph/build/stream_allocator.cc index dae36b83..d896925c 100644 --- a/ge/graph/build/stream_allocator.cc +++ b/ge/graph/build/stream_allocator.cc @@ -432,7 +432,11 @@ Status StreamAllocator::SetActiveStreamsForSubgraphs() { // Insert the send/recv event id to the graph Status StreamAllocator::InsertSyncEvents() { - for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { + auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) { + return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH); + }; + + for (const auto &cur_node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag(), nullptr, ffts_filter)) { // Take the adjacent points, then judge whether need to insert the event for (const OutDataAnchorPtr &anchor : cur_node->GetAllOutDataAnchors()) { for (const InDataAnchorPtr &peer_in_anchor : anchor->GetPeerInDataAnchors()) { @@ -531,6 +535,11 @@ Status StreamAllocator::InsertOneEventInTwoNodes(const NodePtr &cur_node, const Status StreamAllocator::InsertEventsForSubgraph() { for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { GE_CHECK_NOTNULL(subgraph); + const auto parent_node = subgraph->GetParentNode(); + if (parent_node != nullptr && parent_node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { + GELOGD("Skip ffts subgraph, parent node is %s.", parent_node->GetName().c_str()); + continue; + } for (const auto &node : subgraph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); diff --git a/ge/graph/build/task_generator.cc b/ge/graph/build/task_generator.cc index 12da803d..f9456aab 100755 --- a/ge/graph/build/task_generator.cc +++ b/ge/graph/build/task_generator.cc @@ -354,7 +354,10 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra }; GE_MAKE_GUARD(release, callback); - for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag())) { + auto ffts_filter = [](const Node &node, const char *, const ComputeGraphPtr &) { + return !node.GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH); + }; + for (auto &node : graph->GetNodes(graph->GetGraphUnknownFlag(), nullptr, ffts_filter)) { OpDescPtr op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); node_index++; @@ -380,10 +383,8 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra GELOGI("Fusion node[name:%s, type:%s] do not need generate task again.", name.c_str(), type.c_str()); continue; } - if (op_kernel_lib_name.empty()) { - GELOGI("Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); - continue; - } + GE_CHK_BOOL_EXEC_INFO(!op_kernel_lib_name.empty(), continue, + "Node[name:%s, type:%s] does not need to generate task.", name.c_str(), type.c_str()); auto kernel_info_store = ops_kernel_manager.GetOpsKernelInfoStore(op_kernel_lib_name); if (kernel_info_store == nullptr) { REPORT_INNER_ERROR("E19999", "Get ops kernel info store failed for op:%s(%s), op_kernel_name:%s", @@ -394,6 +395,10 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra } GE_CHK_STATUS_RET(UpdateAnchorStatus(node), "[Call][UpdateAnchorStatus] node:%s(%s) failed", name.c_str(), type.c_str()); + if (node->GetOpDesc()->HasAttr(ATTR_NAME_FFTS_SUB_GRAPH)) { + GE_CHK_STATUS_RET(UpdateAnchorStatusForFfts(node), "[Call][UpdateAnchorStatusForFfts] node:%s(%s) failed", + name.c_str(), type.c_str()); + } // Profiling task size_t task_list_size_before = task_def_list.size(); GE_CHK_STATUS_RET(InsertProfilingTaskBefore(op_desc, profiling_point, all_reduce_nodes, node_index, task_def_list)); @@ -571,7 +576,24 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info return ret; } +Status TaskGenerator::UpdateAnchorStatusForFfts(const NodePtr &node) { + GELOGD("Start UpdateAnchorStatusForFfts for %s.", node->GetName().c_str()); + if (!node->GetOpDesc()->GetSubgraphInstanceNames().empty()) { + for (size_t i = 0; i < node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) { + auto sub_graph = NodeUtils::GetSubgraph(*node, i); + GE_CHECK_NOTNULL(sub_graph); + GELOGD("Start update anchor status for %s.", sub_graph->GetName().c_str()); + for (auto &ffts_node : sub_graph->GetDirectNode()) { + GE_CHK_STATUS_RET(UpdateAnchorStatus(ffts_node), "[Call][UpdateAnchorStatus] node:%s(%s) failed", + ffts_node->GetName().c_str(), ffts_node->GetType().c_str()); + } + } + } + return SUCCESS; +} + Status TaskGenerator::UpdateAnchorStatus(const NodePtr &node) { + GELOGD("Start UpdateAnchorStatus for %s.", node->GetName().c_str()); if (NodeUtils::SetAllAnchorStatus(node) != GRAPH_SUCCESS) { REPORT_CALL_ERROR("E19999", "SetAllAnchorStatus fail for op:%s(%s)", node->GetName().c_str(), node->GetType().c_str()); diff --git a/ge/graph/build/task_generator.h b/ge/graph/build/task_generator.h index 9f12d568..40cef3ba 100755 --- a/ge/graph/build/task_generator.h +++ b/ge/graph/build/task_generator.h @@ -80,6 +80,7 @@ class TaskGenerator { Status FindProfilingNodeIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, std::vector &all_reduce_nodes); private: + Status UpdateAnchorStatusForFfts(const NodePtr &node); Status UpdateAnchorStatus(const NodePtr &node); Status UpdateOpIsVarAttr(const OpDescPtr &op_desc, uint64_t session_id); diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 5b67c205..97238a4a 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -99,6 +99,9 @@ const uint32_t kEndOfSequenceNew = 507005; const int32_t kModelAbortNormal = 0x0704000e; const int32_t kModelAbortNormalNew = 507024; const uint32_t kInteval = 2; +const uint32_t kFftsTbeHandleElementSize = 2; +const uint32_t kNonTailBlock = 0; +const uint32_t kTailBlock = 1; const char *const kModelName = "model_name"; const char *const kModeleId = "model_id"; const char *const kLoadStartTime = "load_start_time"; @@ -116,14 +119,15 @@ const char *const kWorkSpaceSize = "workspace_size"; const char *const kTotalSize = "total_size"; const char *const kTaskCount = "task_count"; const char *const kTaskId = "task_id"; -const char* const kRequestId = "request_id"; -const char* const kThreadId = "thread_id"; -const char* const kInputBeginTime = "input_begin_time"; -const char* const kInputEndTime = "input_end_time"; -const char* const kInferBeginTime = "infer_begin_time"; -const char* const kInferEndTime = "infer_end_time"; -const char* const kOutputBeginTime = "output_start_time"; -const char* const kOutputEndTime = "output_end_time"; +const char *const kRequestId = "request_id"; +const char *const kThreadId = "thread_id"; +const char *const kInputBeginTime = "input_begin_time"; +const char *const kInputEndTime = "input_end_time"; +const char *const kInferBeginTime = "infer_begin_time"; +const char *const kInferEndTime = "infer_end_time"; +const char *const kOutputBeginTime = "output_start_time"; +const char *const kOutputEndTime = "output_end_time"; +const char *const kStubFuncName = "_register_stub_func"; const uint32_t kStringHeadElems = 2; const uint32_t kPlacementHostData = 0; const size_t kAlignment = 64; @@ -902,10 +906,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { SetLabelForDynamic(node); auto it = op_desc_handle.find(op_desc->GetType()); if (it != op_desc_handle.end()) { - if ((this->*it->second)(op_desc) != SUCCESS) { - GELOGE(PARAM_INVALID, "[Init][Node] failed, Name:%s", op_desc->GetName().c_str()); - return PARAM_INVALID; - } + GE_CHK_BOOL_TRUE_EXEC_WITH_LOG((this->*it->second)(op_desc) != SUCCESS, return PARAM_INVALID, + "[Init][Node] failed, Name:%s", op_desc->GetName().c_str()); continue; } @@ -935,7 +937,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { GE_TIMESTAMP_RESTART(InitTbeHandle); if (IsTbeTask(op_desc)) { - Status status = InitTbeHandle(op_desc); + Status status = + op_desc->HasAttr(ATTR_NAME_THREAD_SCOPE_ID) ? InitTbeHandleWithFfts(op_desc) : InitTbeHandle(op_desc); if (status != SUCCESS) { GELOGE(status, "[Init][TbeHandle] failed. op:%s", op_desc->GetName().c_str()); return status; @@ -3700,6 +3703,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { /// @return Status /// Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { + string bin_file = op_desc->GetName(); auto kernel = ge_model_->GetTBEKernelStore().FindKernel(op_desc->GetName()); auto tbe_kernel = (kernel != nullptr) ? kernel : op_desc->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr()); if (tbe_kernel == nullptr) { @@ -3708,12 +3712,61 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find tvm bin file!", op_desc->GetName().c_str()); return INTERNAL_ERROR; } + GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file, tbe_kernel, false), "Function register of bin file: %s failed", + bin_file.c_str()); + return SUCCESS; +} - std::string session_graph_model_id; - GetUniqueId(op_desc, session_graph_model_id); - const char *bin_file_key = GetRegisterStub(op_desc->GetName(), session_graph_model_id); // from set, always valid. - TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); +Status DavinciModel::InitTbeHandleWithFfts(const OpDescPtr &op_desc) { + std::vector tbe_kernel; + tbe_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_NAME_THREAD_TBE_KERNEL, tbe_kernel); + GELOGD("Kernel bin ptr vec size is %zu.", tbe_kernel.size()); + if (tbe_kernel.size() != kFftsTbeHandleElementSize) { + REPORT_INNER_ERROR("E19999", "Get tbe_kernel for op:%s(%s) fail, model_id:%u", + op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_); + GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find tvm bin file, size is %zu when ffts", + op_desc->GetName().c_str(), tbe_kernel.size()); + return INTERNAL_ERROR; + } + if (tbe_kernel[0] == nullptr || tbe_kernel[1] == nullptr) { + REPORT_INNER_ERROR("E19999", "Tbe kernel for op:%s is nullptr.", op_desc->GetName().c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: tvm bin file of %s is nullptr when ffts.", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + vector bin_file_keys; + (void)AttrUtils::GetListStr(op_desc, kStubFuncName, bin_file_keys); + if (bin_file_keys.size() != kFftsTbeHandleElementSize) { + REPORT_INNER_ERROR("E19999", "Get bin_file for op:%s(%s) fail.", op_desc->GetName().c_str(), + op_desc->GetType().c_str()); + GELOGE(INTERNAL_ERROR, "[Check][Param] TBE: %s can't find bin file keys, size is %zu when ffts", + op_desc->GetName().c_str(), bin_file_keys.size()); + return INTERNAL_ERROR; + } + GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kNonTailBlock], tbe_kernel[kNonTailBlock], true, + kNonTailBlock), + "Function register of first bin file %s failed.", bin_file_keys[kNonTailBlock].c_str()); + GE_CHK_STATUS_RET(FunctionRegister(op_desc, bin_file_keys[kTailBlock], tbe_kernel[kTailBlock], true, kTailBlock), + "Function register of second bin file %s failed.", bin_file_keys[kTailBlock].c_str()); + return SUCCESS; +} +Status DavinciModel::FunctionRegister(const OpDescPtr &op_desc, string &bin_file, OpKernelBinPtr &tbe_kernel, + bool is_ffts, size_t thread_index) { + if (thread_index > 1) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed. Thread index: %zu should less than 1.", thread_index); + return INTERNAL_ERROR; + } + const char *bin_file_key; + if (is_ffts) { + bin_file_key = GetRegisterStub(bin_file, ""); + GELOGI("Node:%s inherit func name:%s directly.", op_desc->GetName().c_str(), bin_file_key); + } else { + std::string session_graph_model_id; + GetUniqueId(op_desc, session_graph_model_id); + bin_file_key = GetRegisterStub(bin_file, session_graph_model_id); // from set, always valid. + } + + TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); std::lock_guard lock(tvm_bin_mutex_); if (rtQueryFunctionRegistered(bin_file_key) != RT_ERROR_NONE) { void *bin_handle = nullptr; @@ -3721,59 +3774,115 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { GELOGD("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key); rtDevBinary_t binary; - std::string json_string; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string), - GELOGD("Get original type of session_graph_id.")); - if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC; - } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICUBE") { - binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICUBE; - } else { - REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", - TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_); - GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", - TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), - op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_); - return PARAM_INVALID; - } - + GE_CHK_STATUS_RET(InitBinaryMagic(op_desc, is_ffts, thread_index, binary), "Init binary magic of %s failed.", + op_desc->GetName().c_str()); binary.version = 0; binary.data = tbe_kernel->GetBinData(); binary.length = tbe_kernel->GetBinDataSize(); - GELOGD("TBE: binary.length: %lu", binary.length); GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); - std::string meta_data; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data), - GELOGI("Get original type of json_string")); - GELOGD("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); - GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); - + GE_CHK_STATUS_RET(InitMetaData(op_desc, is_ffts, thread_index, bin_handle), "Init tvm meta data of %s failed.", + op_desc->GetName().c_str()); kernel_store.StoreTBEHandle(bin_file_key, bin_handle, tbe_kernel); } else { GELOGI("TBE: find the kernel_name[%s] in HandleMap", bin_file_key); kernel_store.ReferTBEHandle(bin_file_key); } - std::string kernel_name; - GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name), - GELOGD("Get original type of kernel_name")); + GE_CHK_STATUS_RET(InitKernelName(op_desc, is_ffts, thread_index, kernel_name), "Init kernel name of %s failed.", + op_desc->GetName().c_str()); GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0)); used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1. return SUCCESS; } - // Kernel registed, Increase used num in store. StoreTbeHandle(bin_file_key); return SUCCESS; } +Status DavinciModel::InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, + rtDevBinary_t &binary) { + string json_string; + const string &tvm_magic = is_ffts ? TVM_ATTR_NAME_THREAD_MAGIC : TVM_ATTR_NAME_MAGIC; + const static std::map binary_magics = { + {"RT_DEV_BINARY_MAGIC_ELF_AICPU", RT_DEV_BINARY_MAGIC_ELF_AICPU}, + {"RT_DEV_BINARY_MAGIC_ELF", RT_DEV_BINARY_MAGIC_ELF}, + {"RT_DEV_BINARY_MAGIC_ELF_AIVEC", RT_DEV_BINARY_MAGIC_ELF_AIVEC}, + {"RT_DEV_BINARY_MAGIC_ELF_AICUBE", RT_DEV_BINARY_MAGIC_ELF_AICUBE} + }; + if (is_ffts) { + vector json_list; + (void)AttrUtils::GetListStr(op_desc, tvm_magic, json_list); + if (json_list.size() != kFftsTbeHandleElementSize) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed. Attr is %s, thread index is %zu, json list size is %zu.", + tvm_magic.c_str(), thread_index, json_list.size()); + return INTERNAL_ERROR; + } + json_string = json_list[thread_index]; + } else { + (void)AttrUtils::GetStr(op_desc, tvm_magic, json_string); + } + auto iter = binary_magics.find(json_string); + if (iter == binary_magics.end()) { + REPORT_INNER_ERROR("E19999", "Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", + tvm_magic.c_str(), json_string.c_str(), op_desc->GetName().c_str(), + op_desc->GetType().c_str(), model_id_); + GELOGE(PARAM_INVALID, "[Check][Param] Attr:%s value:%s in op:%s(%s), model_id:%u, check invalid", + TVM_ATTR_NAME_MAGIC.c_str(), json_string.c_str(), + op_desc->GetName().c_str(), op_desc->GetType().c_str(), model_id_); + return PARAM_INVALID; + } + binary.magic = iter->second; + return SUCCESS; +} + +Status DavinciModel::InitMetaData(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, void *bin_handle) { + string meta_data; + const string &tvm_metadata = is_ffts ? TVM_ATTR_NAME_THREAD_METADATA : TVM_ATTR_NAME_METADATA; + if (is_ffts) { + vector meta_data_list; + (void)AttrUtils::GetListStr(op_desc, tvm_metadata, meta_data_list); + if (meta_data_list.size() != kFftsTbeHandleElementSize) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, meta data list size is %zu.", + tvm_metadata.c_str(), thread_index, meta_data_list.size()); + return INTERNAL_ERROR; + } + meta_data = meta_data_list[thread_index]; + } else { + (void)AttrUtils::GetStr(op_desc, tvm_metadata, meta_data); + } + GELOGD("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); + if (!meta_data.empty()) { + GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())); + } + return SUCCESS; +} + +Status DavinciModel::InitKernelName(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, string &kernel_name) { + if (is_ffts) { + // delete prefix, eg: *sgt_graph_nodes*/loss_scale/gradient/fp32_vals/Mean_grad/Tile + vector kernel_name_list; + auto pos = op_desc->GetName().find("/"); + if (pos == std::string::npos) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed, subgraph node name: %s.", op_desc->GetName().c_str()); + return INTERNAL_ERROR; + } + string attr_kernel_name = op_desc->GetName().substr(pos + 1) + "_thread_kernelname"; + (void)AttrUtils::GetListStr(op_desc, attr_kernel_name, kernel_name_list); + if (kernel_name_list.size() != kFftsTbeHandleElementSize) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed, attr is %s, thread index is %zu, kernel name list size is %zu.", + attr_kernel_name.c_str(), thread_index, kernel_name_list.size()); + return INTERNAL_ERROR; + } + kernel_name = kernel_name_list[thread_index]; + } else { + string attr_kernel_name = op_desc->GetName() + "_kernelname"; + (void)AttrUtils::GetStr(op_desc, attr_kernel_name, kernel_name); + } + return SUCCESS; +} + void DavinciModel::StoreTbeHandle(const std::string &handle_key) { // Online mode FE may call rtFunctionRegister. TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); diff --git a/ge/graph/load/model_manager/davinci_model.h b/ge/graph/load/model_manager/davinci_model.h index 819a2ea2..4c06ad98 100755 --- a/ge/graph/load/model_manager/davinci_model.h +++ b/ge/graph/load/model_manager/davinci_model.h @@ -771,6 +771,12 @@ class DavinciModel { /// @return Status /// Status InitTbeHandle(const OpDescPtr &op_desc); + Status InitTbeHandleWithFfts(const OpDescPtr &op_desc); + Status FunctionRegister(const OpDescPtr &op_desc, string &bin_file, OpKernelBinPtr &tbe_kernel, bool is_ffts, + size_t thread_index = 0); + Status InitBinaryMagic(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, rtDevBinary_t &binary); + Status InitMetaData(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, void *bin_handle); + Status InitKernelName(const OpDescPtr &op_desc, bool is_ffts, size_t thread_index, string &kernel_name); void StoreTbeHandle(const string &handle_key); void CleanTbeHandle(); diff --git a/ge/graph/load/model_manager/task_info/ffts_task_info.cc b/ge/graph/load/model_manager/task_info/ffts_task_info.cc new file mode 100644 index 00000000..e311ccac --- /dev/null +++ b/ge/graph/load/model_manager/task_info/ffts_task_info.cc @@ -0,0 +1,393 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/load/model_manager/task_info/ffts_task_info.h" + +#include + +#include "graph/load/model_manager/davinci_model.h" + +namespace { +constexpr uint32_t kAddrLen = sizeof(void *); +} +namespace ge { +FftsTaskInfo::~FftsTaskInfo() { + GE_FREE_RT_LOG(args_); +} + +Status FftsTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { + GELOGI("FftsTaskInfo Init Start."); + GE_CHECK_NOTNULL(davinci_model); + davinci_model_ = davinci_model; + GE_CHK_STATUS_RET_NOLOG(SetStream(task_def.stream_id(), davinci_model_->GetStreamList())); + + const domi::FftsTaskDef &ffts_task_def = task_def.ffts_task(); + OpDescPtr op_desc = davinci_model_->GetOpByIndex(ffts_task_def.op_index()); + GE_CHECK_NOTNULL(op_desc); + + if ((ffts_task_def.sub_task_size() > static_cast(RT_FFTS_MAX_SUB_TASK_NUM)) || + (ffts_task_def.ticket_cache_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_NUM))) { + GELOGE(INTERNAL_ERROR, "[Check][Param] failed. Node: %s, sub task desc size: %d, ticket cache size: %d", + op_desc->GetName().c_str(), ffts_task_def.sub_task_size(), ffts_task_def.ticket_cache_size()); + return INTERNAL_ERROR; + } + args_size_ = kAddrLen * ffts_task_def.addr_size(); + GE_CHK_RT_RET(rtMalloc(&args_, args_size_, RT_MEMORY_HBM)); + InitFftsDescInfo(ffts_task_def.ffts_desc(), sub_task_info_.fftsDesc); + + sub_task_info_.fftsType = static_cast(ffts_task_def.ffts_type()); + sub_task_info_.subTaskNum = ffts_task_def.sub_task_size(); + for (int idx = 0; idx < ffts_task_def.sub_task_size(); ++idx) { + GE_CHK_STATUS_RET_NOLOG(InitSubTaskInfo(ffts_task_def.sub_task(idx), sub_task_info_.subTask[idx])); + } + + sub_task_info_.tickCacheNum = ffts_task_def.ticket_cache_size(); + for (int idx = 0; idx < ffts_task_def.ticket_cache_size(); ++idx) { + GE_CHK_STATUS_RET_NOLOG(InitTicketCache(ffts_task_def.ticket_cache(idx), sub_task_info_.ticketCache[idx])); + } + + size_t data_size = kAddrLen * io_addrs_.size(); + GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs_.data(), data_size, RT_MEMCPY_HOST_TO_DEVICE)); + GELOGI("FftsTaskInfo::Init Success. Node: %s, input/output size: %zu", op_desc->GetName().c_str(), io_addrs_.size()); + return SUCCESS; +} + +void FftsTaskInfo::InitFftsDescInfo(const domi::FftsDescInfoDef &ffts_desc_def, rtFftsDescInfo_t &ffts_desc) { + ffts_desc.tm = static_cast(ffts_desc_def.tm()); + ffts_desc.di = static_cast(ffts_desc_def.di()); + ffts_desc.dw = static_cast(ffts_desc_def.dw()); + ffts_desc.df = static_cast(ffts_desc_def.df()); + ffts_desc.dataSplitUnit = static_cast(ffts_desc_def.data_split_unit()); + ffts_desc.prefetchOstNum = static_cast(ffts_desc_def.prefetch_ost_num()); + ffts_desc.cacheMaintainOstNum = static_cast(ffts_desc_def.cache_maintain_ost_num()); + ffts_desc.aicPrefetchUpper = static_cast(ffts_desc_def.aic_prefetch_upper()); + ffts_desc.aicPrefetchLower = static_cast(ffts_desc_def.aic_prefetch_lower()); + ffts_desc.aivPrefetchUpper = static_cast(ffts_desc_def.aiv_prefetch_upper()); + ffts_desc.aivPrefetchLower = static_cast(ffts_desc_def.aiv_prefetch_lower()); +} + +Status FftsTaskInfo::InitSubTaskInfo(const domi::FftsSubTaskDef &sub_task_def, rtFftsSubTaskInfo_t &sub_task_desc) { + if ((sub_task_def.dst_tick_cache_id_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK)) || + (sub_task_def.src_tick_cache_id_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK))) { + GELOGE(FAILED, "[Check][Param] Invalid FftsSubTaskInfo, dst tick cache id size: %d, src tick cache id size: %d", + sub_task_def.dst_tick_cache_id_size(), sub_task_def.src_tick_cache_id_size()); + return FAILED; + } + + if (sub_task_def.has_auto_thread_aic_aiv() == sub_task_def.has_manual_thread_aic_aiv()) { + GELOGE(FAILED, "[Check][Param] Invalid FftsSubTaskInfo, auto thread aic/aiv: %d, manual thread aic/aiv: %d", + sub_task_def.has_auto_thread_aic_aiv(), sub_task_def.has_manual_thread_aic_aiv()); + return FAILED; + } + + thread_dim_ = sub_task_def.thread_dim(); + GE_CHK_BOOL_RET_STATUS(thread_dim_ != 0, FAILED, "[Get][thread_dim] failed, Invalid thread dim: %u!", thread_dim_); + + sub_task_desc.subTaskType = static_cast(sub_task_def.sub_task_type()); + sub_task_desc.threadDim = sub_task_def.thread_dim(); + + sub_task_desc.dstTickCacheVldBitmap = sub_task_def.dst_tick_cache_vld_bitmap(); + sub_task_desc.srcTickCacheVldBitmap = sub_task_def.src_tick_cache_vld_bitmap(); + sub_task_desc.srcDataOutOfSubGraphBitmap = sub_task_def.src_data_out_of_subgraph_bitmap(); + + for (int idx = 0; idx < sub_task_def.dst_tick_cache_id_size(); ++idx) { + sub_task_desc.dstTickCacheID[idx] = sub_task_def.dst_tick_cache_id(idx); + } + + for (int idx = 0; idx < sub_task_def.src_tick_cache_id_size(); ++idx) { + sub_task_desc.srcTickCacheID[idx] = sub_task_def.src_tick_cache_id(idx); + } + + if (sub_task_def.has_auto_thread_aic_aiv()) { + GE_CHK_STATUS_RET_NOLOG(InitAutoAicAiv(sub_task_def.auto_thread_aic_aiv(), sub_task_desc.custom.autoThreadAicAiv)); + } + + if (sub_task_def.has_manual_thread_aic_aiv()) { + GE_CHK_STATUS_RET_NOLOG( + InitManualAicAiv(sub_task_def.manual_thread_aic_aiv(), sub_task_desc.custom.manualThreadAicAiv)); + } + + if (sub_task_def.has_manual_thread_nop()) { + GE_CHK_STATUS_RET_NOLOG(InitManualNop(sub_task_def.manual_thread_nop(), sub_task_desc.custom.manualThreadNop)); + } + + return SUCCESS; +} + +Status FftsTaskInfo::InitTicketCache(const domi::TicketCacheDef &ticket_cache_def, rtTicketCache_t &ticket_cache) { + if (ticket_cache_def.has_auto_thread_cache() == ticket_cache_def.has_manual_thread_cache()) { + GELOGE(FAILED, "[Check][Param] Invalid TicketCacheDef, has auto thread cache: %d, has manual thread cache: %d", + ticket_cache_def.has_auto_thread_cache(), ticket_cache_def.has_manual_thread_cache()); + return FAILED; + } + + ticket_cache.cacheOption = static_cast(ticket_cache_def.cache_option()); + ticket_cache.ticketCacheWindow = ticket_cache_def.ticket_cache_window(); + + if (ticket_cache_def.has_auto_thread_cache()) { + InitAutoCacheInfo(ticket_cache_def.auto_thread_cache(), ticket_cache.custom.autoThreadCache); + } + if (ticket_cache_def.has_manual_thread_cache()) { + GE_CHK_STATUS_RET_NOLOG( + InitManualCacheInfo(ticket_cache_def.manual_thread_cache(), ticket_cache.custom.manualThreadCache)); + } + + return SUCCESS; +} + +// task_addr = {0,200,700,1000,2000, 3500} +// task_addr_offset = {20,40,2,100,200} +template +Status FftsTaskInfo::InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_dim, + uint32_t addr_count) { + for (uint32_t i = 0; i < addr_count; ++i) { + uintptr_t logic_addr = aic_aiv_def.task_addr(i) + thread_dim * aic_aiv_def.task_addr_offset(i); + uint8_t *io_addr = nullptr; + if (ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "[Check][GetRtAddress]GetRtAddress failed."); + return INTERNAL_ERROR; + } + GELOGD("aic_aiv_def task base addr is %ld, offset is %ld, thread is %d, logic addrs is 0x%lx, io addr is %p", + aic_aiv_def.task_addr(i), aic_aiv_def.task_addr_offset(i), thread_dim, logic_addr, io_addr); + io_addrs_.emplace_back(io_addr); + } + return SUCCESS; +} + +Status FftsTaskInfo::InitAutoAicAiv(const domi::AutoThreadAicAivDef &aic_aiv_def, rtAutoThreadAicAivInfo_t &aic_aiv) { + if (aic_aiv_def.src_prefetch_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK)) { + GELOGE(FAILED, "[Check][Param] Invalid AutoThreadAicAivInfo, prefetch size: %d", aic_aiv_def.src_prefetch_size()); + return FAILED; + } + + aic_aiv.taskParamAddr = reinterpret_cast(args_) + kAddrLen * io_addrs_.size(); + GELOGD("AutoThreadAicAivDef: task param addr is %lu.", aic_aiv.taskParamAddr); + const auto &rts_param = davinci_model_->GetRuntimeParam(); + for (uint32_t i = 0; i < thread_dim_ - 1; ++i) { + GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, aic_aiv_def, i, + static_cast(aic_aiv_def.task_addr_offset_size()))); + } + GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, aic_aiv_def, thread_dim_ - 1, aic_aiv_def.input_output_count())); + int last_thread_workspace_size = aic_aiv_def.task_addr_size() - aic_aiv_def.task_addr_offset_size(); + for (int k = 0; k < last_thread_workspace_size; ++k) { + uintptr_t logic_addr = aic_aiv_def.task_addr(aic_aiv_def.task_addr_offset_size() + k); + uint8_t *io_addr = nullptr; + GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr)); + GELOGD("logic addr is 0x%lx, io addr is %p.", logic_addr, io_addr); + io_addrs_.emplace_back(io_addr); + } + + aic_aiv.taskParamOffset = aic_aiv_def.task_param_offset(); + GELOGD("args_: %p, io_addrs size: %zu, task param offset: %u.", args_, io_addrs_.size(), aic_aiv.taskParamOffset); + aic_aiv.satMode = aic_aiv_def.sat_mode(); + aic_aiv.scheduleMode = aic_aiv_def.schedule_mode(); + aic_aiv.iCachePrefetchCnt = aic_aiv_def.cache_prefetch_cnt(); + + aic_aiv.prefetchEnableBitmap = aic_aiv_def.prefetch_enable_bitmap(); + aic_aiv.prefetchOnceBitmap = aic_aiv_def.prefetch_once_bitmap(); + + aic_aiv.tailBlkDim = aic_aiv_def.tail_blk_dim(); + aic_aiv.nonTailBlkDim = aic_aiv_def.non_tail_blk_dim(); + + aic_aiv.nonTailTaskFuncStub = davinci_model_->GetRegisterStub(aic_aiv_def.non_tail_task_func_stub(), ""); + aic_aiv.tailTaskFuncStub = davinci_model_->GetRegisterStub(aic_aiv_def.tail_task_func_stub(), ""); + + GELOGI("Set func name[%s][%s] succ.", aic_aiv.nonTailTaskFuncStub, aic_aiv.tailTaskFuncStub); + for (int idx = 0; idx < aic_aiv_def.src_prefetch_size(); ++idx) { + InitAutoPrefetch(aic_aiv_def.src_prefetch(idx), aic_aiv.srcPrefetch[idx]); + } + + return SUCCESS; +} + +void FftsTaskInfo::InitAutoCacheInfo(const domi::AutoThreadCacheDef &cache_def, rtAutoThreadCacheInfo_t &cache) { + cache.dataAddr = cache_def.data_addr(); + cache.dataAddrOffset = cache_def.data_addr_offset(); + cache.nonTailDataLen = cache_def.non_tail_data_len(); + cache.tailDataLen = cache_def.tail_data_len(); + cache.ticketCacheRefCnt = cache_def.ticket_cache_ref_cnt(); +} + +void FftsTaskInfo::InitAutoPrefetch(const domi::AutoThreadPrefetchDef &prefetch_def, rtAutoThreadPrefetch_t &prefetch) { + prefetch.dataAddr = prefetch_def.data_addr(); + prefetch.dataAddrOffset = prefetch_def.data_addr_offset(); + prefetch.nonTailDataLen = prefetch_def.non_tail_data_len(); + prefetch.tailDataLen = prefetch_def.tail_data_len(); +} + +Status FftsTaskInfo::InitManualAicAiv(const domi::ManualThreadAicAivDef &aic_aiv_def, + rtManualThreadAicAivInfo_t &aic_aiv) { + if ((aic_aiv_def.thread_prefetch_dmu_idx_size() > static_cast(RT_FFTS_MAX_MANUAL_THREAD_NUM)) || + (aic_aiv_def.thread_blk_dim_size() > static_cast(RT_FFTS_MAX_MANUAL_THREAD_NUM)) || + (aic_aiv_def.thread_task_func_stub_size() > static_cast(RT_FFTS_MAX_MANUAL_THREAD_NUM)) || + (aic_aiv_def.src_dep_tbl_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK))) { + GELOGE(FAILED, "[Check][Param] Invalid ManualThreadAicAivInfo, thread prefetch dmu desc size: %d, " + "thread blk dim size: %d, thread task func stub size: %d, src dep tbl size: %d", + aic_aiv_def.thread_prefetch_dmu_idx_size(), aic_aiv_def.thread_blk_dim_size(), + aic_aiv_def.thread_task_func_stub_size(), aic_aiv_def.src_dep_tbl_size()); + return FAILED; + } + aic_aiv.taskParamAddr = reinterpret_cast(args_) + kAddrLen * io_addrs_.size(); + GELOGD("ManualThreadAicAivDef: task param addr is %lu.", aic_aiv.taskParamAddr); + const auto &rts_param = davinci_model_->GetRuntimeParam(); + for (uint32_t i = 0; i < thread_dim_ - 1; ++i) { + GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, aic_aiv_def, i, + static_cast(aic_aiv_def.task_addr_offset_size()))); + } + GE_CHK_STATUS_RET_NOLOG(InitIoAddrs(rts_param, aic_aiv_def, thread_dim_ - 1, aic_aiv_def.input_output_count())); + int last_thread_workspace_size = aic_aiv_def.task_addr_size() - aic_aiv_def.task_addr_offset_size(); + for (int k = 0; k < last_thread_workspace_size; ++k) { + uintptr_t logic_addr = aic_aiv_def.task_addr(aic_aiv_def.task_addr_offset_size() + k); + uint8_t *io_addr = nullptr; + GE_CHK_STATUS_RET_NOLOG(ModelUtils::GetRtAddress(rts_param, logic_addr, io_addr)); + io_addrs_.emplace_back(io_addr); + } + aic_aiv.taskParamOffset = aic_aiv_def.task_param_offset(); + + aic_aiv.satMode = aic_aiv_def.sat_mode(); + aic_aiv.scheduleMode = aic_aiv_def.schedule_mode(); + aic_aiv.iCachePrefetchCnt = aic_aiv_def.cache_prefetch_cnt(); + + aic_aiv.prefetchEnableBitmap = aic_aiv_def.prefetch_enable_bitmap(); // 8 bit bitmap 1 0 1 0 + aic_aiv.prefetchOnceBitmap = aic_aiv_def.prefetch_once_bitmap(); // 8 bit bitmap 1 0 1 0 + aic_aiv.prefetchOnceDmuNum = aic_aiv_def.prefetch_once_dmu_num(); + + for (int idx = 0; idx < aic_aiv_def.thread_prefetch_dmu_idx_size(); ++idx) { + aic_aiv.threadPrefetchDmuIdx[idx] = aic_aiv_def.thread_prefetch_dmu_idx(idx); + } + for (int idx = 0; idx < aic_aiv_def.thread_blk_dim_size(); ++idx) { + aic_aiv.threadBlkDim[idx] = aic_aiv_def.thread_blk_dim(idx); + } + for (int idx = 0; idx < aic_aiv_def.thread_task_func_stub_size(); ++idx) { + aic_aiv.threadTaskFuncStub[idx] = aic_aiv_def.thread_task_func_stub(idx).c_str(); + } + + InitManualDmuInfo(aic_aiv_def, aic_aiv.prefetchList); + for (int idx = 0; idx < aic_aiv_def.src_dep_tbl_size(); ++idx) { + GE_CHK_STATUS_RET_NOLOG(InitManualDependency(aic_aiv_def.src_dep_tbl(idx), aic_aiv.srcDepTbl[idx])); + } + + return SUCCESS; +} + +Status FftsTaskInfo::InitManualCacheInfo(const domi::ManualThreadCacheDef &cache_def, + rtManualThreadCacheInfo_t &cache_info) { + if ((cache_def.slice_dmu_idx_size() > static_cast(RT_FFTS_MAX_MANUAL_THREAD_NUM)) || + (cache_def.ticket_cache_ref_cnt_tbl_size() > static_cast(RT_FFTS_MAX_MANUAL_THREAD_NUM))) { + GELOGE(FAILED, "[Check][Param] Invalid ManualThreadCacheInfo slice dum desc index %d, ticket cache ref cnt %d", + cache_def.slice_dmu_idx_size(), cache_def.ticket_cache_ref_cnt_tbl_size()); + return FAILED; + } + + InitManualDmuInfo(cache_def, cache_info.dmuList); + for (int idx = 0; idx < cache_def.slice_dmu_idx_size(); ++idx) { + cache_info.sliceDmuIdx[idx] = cache_def.slice_dmu_idx(idx); + } + + for (int idx = 0; idx < cache_def.ticket_cache_ref_cnt_tbl_size(); ++idx) { + cache_info.ticketCacheRefCntTbl[idx] = cache_def.ticket_cache_ref_cnt_tbl(idx); + } + + return SUCCESS; +} + +Status FftsTaskInfo::InitManualDependency(const domi::ManualThreadDependencyDef &dependency_def, + rtManualThreadDependency_t &dependency) { + if (dependency_def.dependency_size() > static_cast(RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN)) { + GELOGE(FAILED, "[Check][Param] Invalid ManualThreadDependency size: %d", dependency_def.dependency_size()); + return FAILED; + } + + for (int idx = 0; idx < dependency_def.dependency_size(); ++idx) { + dependency.dependency[idx] = dependency_def.dependency(idx); + } + + return SUCCESS; +} + +Status FftsTaskInfo::InitManualNop(const domi::ManualThreadNopDef &nop_def, rtManualThreadNopInfo_t &nop_info) { + if (nop_def.src_dep_tbl_size() > static_cast(RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK)) { + GELOGE(FAILED, "[Check][Param] Invalid ManualThreadNopInfo, src dep tbl size: %d", nop_def.src_dep_tbl_size()); + return FAILED; + } + + for (int idx = 0; idx < nop_def.src_dep_tbl_size(); ++idx) { + GE_CHK_STATUS_RET_NOLOG(InitManualDependency(nop_def.src_dep_tbl(idx), nop_info.srcDepTbl[idx])); + } + + return SUCCESS; +} + +void FftsTaskInfo::InitManualDmuInfo(const domi::ManualThreadAicAivDef &aic_aiv_def, rtManualThreadDmuInfo_t *&dmu) { + if (aic_aiv_def.prefetch_list().empty()) { + return; + } + + std::vector buffer(sizeof(rtManualThreadDmuInfo_t) * aic_aiv_def.prefetch_list_size()); + dmu = reinterpret_cast(buffer.data()); + for (int idx = 0; idx < aic_aiv_def.prefetch_list_size(); ++idx) { + InitManualDmuInfo(aic_aiv_def.prefetch_list(idx), dmu[idx]); + } +} + +void FftsTaskInfo::InitManualDmuInfo(const domi::ManualThreadCacheDef &cache_def, rtManualThreadDmuInfo_t *&dmu) { + if (cache_def.dmu_list().empty()) { + return; + } + + std::vector buffer(sizeof(rtManualThreadDmuInfo_t) * cache_def.dmu_list_size()); + dmu = reinterpret_cast(buffer.data()); + for (int idx = 0; idx < cache_def.dmu_list_size(); ++idx) { + InitManualDmuInfo(cache_def.dmu_list(idx), dmu[idx]); + } +} + +void FftsTaskInfo::InitManualDmuInfo(const domi::ManualThreadDmuDef &dmu_def, rtManualThreadDmuInfo_t &dmu) { + dmu.dataAddr = dmu_def.data_addr(); + dmu.numOuter = dmu_def.num_outer(); + dmu.numInner = dmu_def.num_inner(); + dmu.strideOuter = dmu_def.stride_outer(); + dmu.lenInner = dmu_def.len_inner(); + dmu.strideInner = dmu_def.stride_inner(); +} + +Status FftsTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { + return SUCCESS; +} + +Status FftsTaskInfo::UpdateArgs() { + GE_CHECK_NOTNULL(davinci_model_); + std::vector io_addrs = io_addrs_; + davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); + auto addr_size = kAddrLen * io_addrs.size(); + GE_CHK_RT_RET(rtMemcpy(args_, args_size_, io_addrs.data(), addr_size, RT_MEMCPY_HOST_TO_DEVICE)); + return SUCCESS; +} + +Status FftsTaskInfo::Distribute() { + GELOGI("FftsTaskInfo Distribute Start."); + rtError_t rt_ret = rtFftsTaskLaunch(&sub_task_info_, stream_); + if (rt_ret != RT_ERROR_NONE) { + GELOGE(RT_FAILED, "[Check][RT_ret] Call rtFftsTaskLaunch failed, ret: 0x%X", rt_ret); + return RT_ERROR_TO_GE_STATUS(rt_ret); + } + + GELOGI("FftsTaskInfo Distribute Success."); + return SUCCESS; +} + +REGISTER_TASK_INFO(RT_MODEL_TASK_FFTS_TASK, FftsTaskInfo); +} // namespace ge diff --git a/ge/graph/load/model_manager/task_info/ffts_task_info.h b/ge/graph/load/model_manager/task_info/ffts_task_info.h new file mode 100644 index 00000000..ffc286f9 --- /dev/null +++ b/ge/graph/load/model_manager/task_info/ffts_task_info.h @@ -0,0 +1,66 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_TASK_INFO_H_ +#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_TASK_INFO_H_ + +#include "graph/load/model_manager/task_info/task_info.h" +#include "graph/op_desc.h" + +namespace ge { +class FftsTaskInfo : public TaskInfo { + public: + FftsTaskInfo() = default; + ~FftsTaskInfo() override; + + Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; + + Status Distribute() override; + + Status UpdateArgs() override; + + Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override; + + private: + void InitFftsDescInfo(const domi::FftsDescInfoDef &ffts_desc_def, rtFftsDescInfo_t &ffts_desc); + Status InitSubTaskInfo(const domi::FftsSubTaskDef &task_def, rtFftsSubTaskInfo_t &task); + Status InitTicketCache(const domi::TicketCacheDef &cache_def, rtTicketCache_t &cache); + + Status InitAutoAicAiv(const domi::AutoThreadAicAivDef &aic_aiv_def, rtAutoThreadAicAivInfo_t &aic_aiv); + void InitAutoCacheInfo(const domi::AutoThreadCacheDef &cache_def, rtAutoThreadCacheInfo_t &cache); + void InitAutoPrefetch(const domi::AutoThreadPrefetchDef &prefetch_def, rtAutoThreadPrefetch_t &prefetch); + + Status InitManualAicAiv(const domi::ManualThreadAicAivDef &aic_aiv_def, rtManualThreadAicAivInfo_t &aic_aiv); + Status InitManualCacheInfo(const domi::ManualThreadCacheDef &cache_def, rtManualThreadCacheInfo_t &cache); + Status InitManualDependency(const domi::ManualThreadDependencyDef &depend_def, rtManualThreadDependency_t &depend); + Status InitManualNop(const domi::ManualThreadNopDef &nop_def, rtManualThreadNopInfo_t &nop); + + void InitManualDmuInfo(const domi::ManualThreadDmuDef &dmu_def, rtManualThreadDmuInfo_t &dmu); + void InitManualDmuInfo(const domi::ManualThreadCacheDef &cache_def, rtManualThreadDmuInfo_t *&dmu); + void InitManualDmuInfo(const domi::ManualThreadAicAivDef &aic_aiv_def, rtManualThreadDmuInfo_t *&dmu); + + template + Status InitIoAddrs(const RuntimeParam &rts_param, const T &aic_aiv_def, uint32_t thread_dim, uint32_t addr_count); + + DavinciModel *davinci_model_{nullptr}; + rtFftsTaskInfo_t sub_task_info_; + std::vector io_addrs_; + uint32_t thread_dim_{0}; + void *args_{nullptr}; // runtime args memory + uint32_t args_size_{0}; // runtime args memory length +}; +} // namespace ge +#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FFTS_TASK_INFO_H_ diff --git a/ge/graph/partition/graph_partition.cc b/ge/graph/partition/graph_partition.cc index c3f9480d..a810aab0 100755 --- a/ge/graph/partition/graph_partition.cc +++ b/ge/graph/partition/graph_partition.cc @@ -179,6 +179,7 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr GELOGE(ret, "[Merge][SubGraph] Failed, ret:%d", ret); } GE_CHECK_NOTNULL(original_compute_graph); + output_merged_compute_graph->SetName(original_compute_graph->GetName()); // partition sub graph for (const auto &sub_graph : original_compute_graph->GetAllSubgraphs()) { ComputeGraphPtr merged_sub_graph = nullptr; @@ -188,8 +189,16 @@ Status ge::GraphPartitioner::MergeAfterSubGraphOptimization(ge::ComputeGraphPtr GELOGE(ret, "[Merge][SubGraph] Failed, ret:%d", ret); continue; } + // this means subgraph added in optimize subgraph and without partitions, so just add to root graph + if (merged_sub_graph == sub_graph) { + GELOGI("Just add subgraph %s (parent node is %s) to root graph %s.", sub_graph->GetName().c_str(), + sub_graph->GetParentNode()->GetName().c_str(), output_merged_compute_graph->GetName().c_str()); + sub_graph->SetParentGraph(sub_graph->GetParentNode()->GetOwnerComputeGraph()); + GE_IF_BOOL_EXEC(output_merged_compute_graph->AddSubgraph(sub_graph->GetName(), merged_sub_graph) != SUCCESS, + return FAILED;) + continue; + } // add sub graph - output_merged_compute_graph->SetName(original_compute_graph->GetName()); merged_sub_graph->SetName(sub_graph->GetName()); merged_sub_graph->SetInputSize(sub_graph->GetInputSize()); merged_sub_graph->SetOutputSize(sub_graph->GetOutputSize()); @@ -245,12 +254,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co } if ((graph_2_graph_partition_info_.find(original_compute_graph) == graph_2_graph_partition_info_.end()) || (graph_2_subgraph_list_.find(original_compute_graph) == graph_2_subgraph_list_.end())) { - REPORT_INNER_ERROR("E19999", "original_compute_graph:%s is not find in graph_2_graph_partition_info_.", - original_compute_graph->GetName().c_str()); - GELOGE(GE_GRAPH_NULL_INPUT, - "[Check][Param] original_compute_graph:%s is not find in graph_2_graph_partition_info_.", - original_compute_graph->GetName().c_str()); - return FAILED; + GELOGW("[GraphPartition]: compute_graph has not found, just return original."); + output_merged_compute_graph = original_compute_graph; + return SUCCESS; } GraphPartitionInfo &subgraph_info = graph_2_graph_partition_info_[original_compute_graph]; const auto &sub_graph_list = graph_2_subgraph_list_[original_compute_graph]; @@ -708,6 +714,7 @@ Status ge::GraphPartitioner::AddPartitionsToGraphNode(vectorGetName()); + (void)sub_graph->SetExtAttr("part_src_graph", compute_graph); GELOGD("set attr success. subgraph(%s) with parent graph(%s)", sub_graph->GetName().c_str(), compute_graph->GetName().c_str()); GE_DUMP(sub_graph, sub_graph->GetName() + "_" + mode_2_str_[graph_info_.mode_]); diff --git a/metadef b/metadef index c6030152..00c0c12e 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit c6030152c6dc05515115765babb5d64fde649df4 +Subproject commit 00c0c12eede6c7bce93a1eda5f0bb437ae80a7ec diff --git a/parser b/parser index 155d3262..3073129b 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 155d3262ba17f800094abb58b6a809b041cf0a74 +Subproject commit 3073129b68c0fae12a8b7531d60782e39128a28c diff --git a/tests/depends/runtime/src/runtime_stub.cc b/tests/depends/runtime/src/runtime_stub.cc index 2b1af23c..0c9e2c27 100644 --- a/tests/depends/runtime/src/runtime_stub.cc +++ b/tests/depends/runtime/src/runtime_stub.cc @@ -456,6 +456,10 @@ rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void rtError_t rtDebugUnRegisterForStream(rtStream_t stream) { return RT_ERROR_NONE; } + +rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream) { + return RT_ERROR_NONE; +} #ifdef __cplusplus } #endif diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 0d1ae079..8b024820 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -437,6 +437,7 @@ set(DISTINCT_GRAPH_LOAD_SRC_FILES "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/stream_active_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/end_graph_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/model_exit_task_info.cc" + "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/ffts_task_info.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" @@ -649,6 +650,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES "graph/load/hccl_task_info_unittest.cc" "graph/load/kernel_ex_task_info_unittest.cc" "graph/load/kernel_task_info_unittest.cc" + "graph/load/ffts_task_info_unittest.cc" "graph/load/memcpy_addr_async_task_info_unittest.cc" "graph/load/memcpy_async_task_info_unittest.cc" "graph/load/cpu_queue_schedule_unittest.cc" diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 3f9cc850..ddf241ff 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -1059,4 +1059,144 @@ TEST_F(UtestDavinciModel, get_total_memsize_exclude_zero_copy) { EXPECT_EQ(model.GetTotalMemSizeExcludeZeroCopy(total_useful_size), SUCCESS); EXPECT_EQ(total_useful_size, 512); } + +// test InitTbeHandle +TEST_F(UtestDavinciModel, init_tbe_handle) { + DavinciModel model(0, nullptr); + OpDescPtr op_desc = CreateOpDesc("data", DATA); + model.ge_model_ = make_shared(); + // without kernel + EXPECT_EQ(model.InitTbeHandle(op_desc), INTERNAL_ERROR); + vector buffer; + string key = op_desc->GetName(); + TBEKernelPtr tbe_kernel_ptr = std::make_shared(key, std::move(buffer)); + op_desc->SetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, tbe_kernel_ptr); + string attr_kernel_name = op_desc->GetName() + "_kernelname"; + string kernel_name = "kernel_name"; + AttrUtils::SetStr(op_desc, attr_kernel_name, kernel_name); + EXPECT_EQ(model.InitTbeHandle(op_desc), SUCCESS); + // rtQueryFunctionRegistered(bin_file_key) failed + EXPECT_EQ(model.used_tbe_handle_map_.size(), 0); +} + +// test InitTbeHandleWithFfts +TEST_F(UtestDavinciModel, init_tbe_handle_with_ffts) { + DavinciModel model(0, nullptr); + OpDescPtr op_desc = CreateOpDesc("data", DATA); + model.ge_model_ = make_shared(); + // without tbe_kernel + EXPECT_EQ(model.InitTbeHandleWithFfts(op_desc), INTERNAL_ERROR); + + std::vector tbe_kernel; + vector buffer; + string key = op_desc->GetName(); + OpKernelBinPtr tbe_kernel_ptr0 = std::make_shared(key, std::move(buffer)); + OpKernelBinPtr tbe_kernel_ptr1 = std::make_shared(key, std::move(buffer)); + tbe_kernel.push_back(tbe_kernel_ptr0); + tbe_kernel.push_back(tbe_kernel_ptr1); + op_desc->SetExtAttr(OP_EXTATTR_NAME_THREAD_TBE_KERNEL, tbe_kernel); + // without _register_stub_func + EXPECT_EQ(model.InitTbeHandleWithFfts(op_desc), INTERNAL_ERROR); + + vector bin_file_keys; + bin_file_keys.emplace_back(op_desc->GetName() + "_0"); + bin_file_keys.emplace_back(op_desc->GetName() + "_1"); + AttrUtils::SetListStr(op_desc, "_register_stub_func", bin_file_keys); + + EXPECT_EQ(model.InitTbeHandleWithFfts(op_desc), SUCCESS); + // rtQueryFunctionRegistered(bin_file_key) failed + EXPECT_EQ(model.used_tbe_handle_map_.size(), 0); +} + +// test InitBinaryMagic +TEST_F(UtestDavinciModel, init_binary_magic) { + DavinciModel model(0, nullptr); + rtDevBinary_t binary; + OpDescPtr op_desc = CreateOpDesc("data", DATA); + bool is_ffts = true; + vector json_list; + AttrUtils::SetListStr(op_desc, TVM_ATTR_NAME_THREAD_MAGIC, json_list); + // without tvm_magic + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 0, binary), INTERNAL_ERROR); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF_AICPU"); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF"); + op_desc->DelAttr(TVM_ATTR_NAME_THREAD_MAGIC); + AttrUtils::SetListStr(op_desc, TVM_ATTR_NAME_THREAD_MAGIC, json_list); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 0, binary), SUCCESS); + EXPECT_EQ(binary.magic, RT_DEV_BINARY_MAGIC_ELF_AICPU); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 1, binary), SUCCESS); + EXPECT_EQ(binary.magic, RT_DEV_BINARY_MAGIC_ELF); + + json_list.clear(); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF_AIVEC"); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF_AICUBE"); + op_desc->DelAttr(TVM_ATTR_NAME_THREAD_MAGIC); + AttrUtils::SetListStr(op_desc, TVM_ATTR_NAME_THREAD_MAGIC, json_list); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 0, binary), SUCCESS); + EXPECT_EQ(binary.magic, RT_DEV_BINARY_MAGIC_ELF_AIVEC); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 1, binary), SUCCESS); + EXPECT_EQ(binary.magic, RT_DEV_BINARY_MAGIC_ELF_AICUBE); + + // with invalid json type + json_list.clear(); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF_INVALID"); + json_list.emplace_back("RT_DEV_BINARY_MAGIC_ELF_INVALID"); + op_desc->DelAttr(TVM_ATTR_NAME_THREAD_MAGIC); + AttrUtils::SetListStr(op_desc, TVM_ATTR_NAME_THREAD_MAGIC, json_list); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 0, binary), PARAM_INVALID); + + // test unffts + is_ffts = false; + string json_string = "RT_DEV_BINARY_MAGIC_ELF_AIVEC"; + AttrUtils::SetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string); + EXPECT_EQ(model.InitBinaryMagic(op_desc, is_ffts, 0, binary), SUCCESS); + EXPECT_EQ(binary.magic, RT_DEV_BINARY_MAGIC_ELF_AIVEC); +} + +// test InitMetaData +TEST_F(UtestDavinciModel, init_meta_data) { + DavinciModel model(0, nullptr); + void *bin_handle; + OpDescPtr op_desc = CreateOpDesc("data", DATA); + bool is_ffts = true; + vector meta_data_list; + // with empty meta_data + EXPECT_EQ(model.InitMetaData(op_desc, is_ffts, 0, bin_handle), INTERNAL_ERROR); + meta_data_list.emplace_back("meta_data_0"); + meta_data_list.emplace_back("meta_data_1"); + AttrUtils::SetListStr(op_desc, TVM_ATTR_NAME_THREAD_METADATA, meta_data_list); + EXPECT_EQ(model.InitMetaData(op_desc, is_ffts, 0, bin_handle), SUCCESS); + + is_ffts = false; + string meta_data = "meta_data"; + AttrUtils::SetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data); + EXPECT_EQ(model.InitMetaData(op_desc, is_ffts, 0, bin_handle), SUCCESS); +} + +// test InitKernelName +TEST_F(UtestDavinciModel, init_kernel_name) { + DavinciModel model(0, nullptr); + string kernel_name; + OpDescPtr op_desc = CreateOpDesc("data", DATA); + bool is_ffts = true; + // failed when name is invalid + EXPECT_EQ(model.InitKernelName(op_desc, is_ffts, 0, kernel_name), INTERNAL_ERROR); + OpDescPtr op_desc1 = CreateOpDesc("sgt_graph_nodes/loss_scale", SCALE); + string attr_kernel_name = "loss_scale_thread_kernelname"; + vector kernel_name_list; + AttrUtils::SetListStr(op_desc, attr_kernel_name, kernel_name_list); + // failed without kernel_name + EXPECT_EQ(model.InitKernelName(op_desc, is_ffts, 0, kernel_name), INTERNAL_ERROR); + kernel_name_list.emplace_back("kernel_name_0"); + kernel_name_list.emplace_back("kernel_name_1"); + AttrUtils::SetListStr(op_desc1, attr_kernel_name, kernel_name_list); + EXPECT_EQ(model.InitKernelName(op_desc1, is_ffts, 0, kernel_name), SUCCESS); + + // without ffts + is_ffts = false; + attr_kernel_name = "data_kernelname"; + kernel_name = "kernel_name"; + AttrUtils::SetStr(op_desc, attr_kernel_name, kernel_name); + EXPECT_EQ(model.InitKernelName(op_desc, is_ffts, 0, kernel_name), SUCCESS); +} } // namespace ge diff --git a/tests/ut/ge/graph/load/ffts_task_info_unittest.cc b/tests/ut/ge/graph/load/ffts_task_info_unittest.cc new file mode 100644 index 00000000..25838f7e --- /dev/null +++ b/tests/ut/ge/graph/load/ffts_task_info_unittest.cc @@ -0,0 +1,212 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#define private public +#define protected public + +#include "graph/load/model_manager/task_info/ffts_task_info.h" +#include "cce/aicpu_engine_struct.h" +#include "common/ge/ge_util.h" +#include "common/properties_manager.h" +#include "framework/common/debug/ge_log.h" +#include "framework/common/fmk_error_codes.h" +#include "graph/attr_value.h" +#include "graph/load/model_manager/davinci_model.h" +#include "graph/load/model_manager/model_manager.h" +#include "runtime/rt_ffts.h" + +namespace ge { +extern OpDescPtr CreateOpDesc(string name, string type); + +class UtestFftsTaskInfo : public testing::Test { +protected: + void SetUp() {} + + void TearDown() {} + +public: + void CreateFftsTaskInfo(DavinciModel &davinci_model, domi::TaskDef &task_def, FftsTaskInfo &ffts_task_info) { + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + davinci_model.stream_list_ = { stream }; + task_def.set_stream_id(0); + + domi::FftsTaskDef *ffts_task_def = task_def.mutable_ffts_task(); + davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); + ffts_task_def->set_op_index(0); + ffts_task_def->set_addr_size(2); + domi::FftsDescInfoDef *ffts_desc = ffts_task_def->mutable_ffts_desc(); + ffts_desc->set_tm(0); + rtFftsTaskInfo_t sub_task_info; + ffts_task_info.sub_task_info_ = sub_task_info; + ffts_task_def->set_ffts_type(RT_FFTS_TYPE_AUTO_THREAD); + } +}; + +// test FftsTaskInfo Init with no subtask and no ticket cache +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_without_subtask) { + DavinciModel davinci_model(0, nullptr); + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + davinci_model.stream_list_ = { stream }; + domi::TaskDef task_def; + task_def.set_stream_id(0); + + domi::FftsTaskDef *ffts_task_def = task_def.mutable_ffts_task(); + FftsTaskInfo ffts_task_info; + // init failed when model without op_desc + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), PARAM_INVALID); + + davinci_model.op_list_[0] = CreateOpDesc("test", PARTITIONEDCALL); + ffts_task_def->set_op_index(0); + ffts_task_def->set_addr_size(2); + domi::FftsDescInfoDef *ffts_desc = ffts_task_def->mutable_ffts_desc(); + ffts_desc->set_tm(0); + rtFftsTaskInfo_t sub_task_info; + ffts_task_info.sub_task_info_ = sub_task_info; + ffts_task_def->set_ffts_type(RT_FFTS_TYPE_AUTO_THREAD); + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Init with subtask and no ticket cache: AutoThreadAicAivDef +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_with_auto_thread_subgraph) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + CreateFftsTaskInfo(davinci_model, task_def, ffts_task_info); + domi::FftsSubTaskDef *ffts_sub_task_def = task_def.mutable_ffts_task()->add_sub_task(); + ffts_sub_task_def->set_thread_dim(static_cast(1)); + //sub_task_def.has_auto_thread_aic_aiv() == sub_task_def.has_manual_thread_aic_aiv() + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), FAILED); + + domi::AutoThreadAicAivDef *auto_thread_aic_aiv_def = ffts_sub_task_def->mutable_auto_thread_aic_aiv(); + domi::AutoThreadPrefetchDef *src_prefetch = auto_thread_aic_aiv_def->add_src_prefetch(); + // without InitIoAddrs + ffts_task_info.thread_dim_ = 0; + RuntimeParam runtime_param; + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Init with subtask and no ticket cache: ManualThreadAicAivDef +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_with_manual_thread_subgraph) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + CreateFftsTaskInfo(davinci_model, task_def, ffts_task_info); + domi::FftsSubTaskDef *ffts_sub_task_def = task_def.mutable_ffts_task()->add_sub_task(); + ffts_sub_task_def->set_thread_dim(static_cast(1)); + //sub_task_def.has_auto_thread_aic_aiv() == sub_task_def.has_manual_thread_aic_aiv() + + domi::ManualThreadAicAivDef *manual_thread_aic_aiv_def = ffts_sub_task_def->mutable_manual_thread_aic_aiv(); + manual_thread_aic_aiv_def->add_thread_prefetch_dmu_idx(static_cast(0)); + manual_thread_aic_aiv_def->add_thread_blk_dim(static_cast(0)); + manual_thread_aic_aiv_def->add_thread_task_func_stub("ffts"); + domi::ManualThreadDmuDef *prefetch_list = manual_thread_aic_aiv_def->add_prefetch_list(); + prefetch_list->set_data_addr(static_cast(0)); + // without InitIoAddrs + ffts_task_info.thread_dim_ = 0; + RuntimeParam runtime_param; + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Init with subtask and no ticket cache: ManualThreadNopDef +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_with_manual_thread_nop_subgraph) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + CreateFftsTaskInfo(davinci_model, task_def, ffts_task_info); + + domi::FftsSubTaskDef *ffts_sub_task_def = task_def.mutable_ffts_task()->add_sub_task(); + ffts_sub_task_def->set_thread_dim(static_cast(1)); + domi::AutoThreadAicAivDef *auto_thread_aic_aiv_def = ffts_sub_task_def->mutable_auto_thread_aic_aiv(); + domi::ManualThreadNopDef *manual_thread_nop = ffts_sub_task_def->mutable_manual_thread_nop(); + domi::ManualThreadDependencyDef *src_dep_tbl = manual_thread_nop->add_src_dep_tbl(); + src_dep_tbl->add_dependency(static_cast(0)); + + // without InitIoAddrs + ffts_task_info.thread_dim_ = 0; + RuntimeParam runtime_param; + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Init with no subtask and ticket cache:AutoThreadCacheDef +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_with_auto_thread_ticket_cache) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + CreateFftsTaskInfo(davinci_model, task_def, ffts_task_info); + + domi::TicketCacheDef *ticket_cache_def = task_def.mutable_ffts_task()->add_ticket_cache(); + //ticket_cache_def.has_auto_thread_cache() == ticket_cache_def.has_manual_thread_cache() + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), FAILED); + domi::AutoThreadCacheDef *auto_thread_cache = ticket_cache_def->mutable_auto_thread_cache(); + + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Init with no subtask and ticket cache:ManualThreadCacheDef +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_with_manual_thread_ticket_cache) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + CreateFftsTaskInfo(davinci_model, task_def, ffts_task_info); + + domi::TicketCacheDef *ticket_cache_def = task_def.mutable_ffts_task()->add_ticket_cache(); + domi::ManualThreadCacheDef *manual_thread_cache = ticket_cache_def->mutable_manual_thread_cache(); + manual_thread_cache->add_slice_dmu_idx(static_cast(0)); + manual_thread_cache->add_ticket_cache_ref_cnt_tbl(static_cast(0)); + domi::ManualThreadDmuDef *dmu_list = manual_thread_cache->add_dmu_list(); + + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.Init(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo UpdateArgs +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_update_args) { + DavinciModel davinci_model(0, nullptr); + FftsTaskInfo ffts_task_info; + ffts_task_info.davinci_model_ = &davinci_model; + ffts_task_info.io_addrs_ = { (void*)0x12345678, (void*)0x22345678 }; + EXPECT_EQ(ffts_task_info.UpdateArgs(), SUCCESS); +} + +// test FftsTaskInfo CalculateArgs +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_calculate_args) { + DavinciModel davinci_model(0, nullptr); + domi::TaskDef task_def; + FftsTaskInfo ffts_task_info; + EXPECT_EQ(ffts_task_info.CalculateArgs(task_def, &davinci_model), SUCCESS); +} + +// test FftsTaskInfo Distribute +TEST_F(UtestFftsTaskInfo, success_ffts_task_info_distribute) { + DavinciModel davinci_model(0, nullptr); + FftsTaskInfo ffts_task_info; + rtFftsTaskInfo_t sub_task_info; + ffts_task_info.sub_task_info_ = sub_task_info; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + ffts_task_info.stream_ = stream; + EXPECT_EQ(ffts_task_info.Distribute(), SUCCESS); +} +} // namespace ge \ No newline at end of file diff --git a/third_party/fwkacllib/inc/runtime/rt.h b/third_party/fwkacllib/inc/runtime/rt.h index 83cafa3c..aa394eea 100644 --- a/third_party/fwkacllib/inc/runtime/rt.h +++ b/third_party/fwkacllib/inc/runtime/rt.h @@ -27,5 +27,6 @@ #include "mem.h" #include "rt_model.h" #include "stream.h" +#include "rt_ffts.h" #endif // __CCE_RUNTIME_RT_H__ diff --git a/third_party/fwkacllib/inc/runtime/rt_ffts.h b/third_party/fwkacllib/inc/runtime/rt_ffts.h new file mode 100755 index 00000000..720da7cd --- /dev/null +++ b/third_party/fwkacllib/inc/runtime/rt_ffts.h @@ -0,0 +1,185 @@ +/* + * Copyright (c) Huawei Technologies Co. , Ltd. 2021. All rights reserved. + * Description: ffts interface + */ + +#ifndef __CCE_RUNTIME_FFTS_H +#define __CCE_RUNTIME_FFTS_H + +#include "base.h" + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +extern "C" { +#endif + +#define RT_FFTS_MAX_SUB_TASK_NUM 32U +#define RT_FFTS_MAX_TICKET_CACHE_NUM 64U +#define RT_FFTS_MAX_MANUAL_THREAD_NUM 16U +#define RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK 8U +#define RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN 32U + +typedef enum tagFftsType { + RT_FFTS_TYPE_AUTO_THREAD = 2, // ffts auto thread mode, same as ffts define + RT_FFTS_TYPE_MANUAL_THREAD = 3, // ffts manual thread mode, same as ffts define +} rtFftsType_t; + +typedef enum tagFftsSubTaskType { + RT_FFTS_SUB_TASK_TYPE_AIC = 0, + RT_FFTS_SUB_TASK_TYPE_AIV = 1, + RT_FFTS_SUB_TASK_TYPE_NOP = 2, + RT_FFTS_SUB_TASK_TYPE_NOTIFY_WAIT = 3, + RT_FFTS_SUB_TASK_TYPE_NOTIFY_RECORD = 4, + RT_FFTS_SUB_TASK_TYPE_WRITE_VALUE = 5, + RT_FFTS_SUB_TASK_TYPE_MIX_AIC = 6, + RT_FFTS_SUB_TASK_TYPE_MIX_AIV = 7, + RT_FFTS_SUB_TASK_TYPE_SDMA = 8, + RT_FFTS_SUB_TASK_TYPE_RESERVED, +} rtFftsSubTaskType_t; + +typedef struct tagManualThreadDmuInfo { + uint64_t dataAddr; // device mem + uint16_t numOuter; + uint16_t numInner; + uint32_t strideOuter; + uint32_t lenInner; + uint32_t strideInner; +} rtManualThreadDmuInfo_t; + +typedef struct tagManualThreadDependency { + uint8_t dependency[RT_FFTS_MANUAL_SRC_DEPEND_TBL_LEN]; +} rtManualThreadDependency_t; + +typedef struct tagManualThreadAicAivInfo { + uint64_t taskParamAddr; // device mem + uint16_t taskParamOffset; + // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows + // results will be saturated to +/- MAX of FP16 + uint8_t satMode; + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap 1 0 1 0 + uint8_t prefetchOnceBitmap; // 8 bit bitmap 1 0 1 0 + uint16_t prefetchOnceDmuNum; // prefetch_once_dmu_descriptor_index in ffts + // num: thread0_prefetch_dmu_descriptor_index - prefetch_once_dmu_descriptor_index + uint16_t threadPrefetchDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; // max valid is threadDim + uint16_t threadBlkDim[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + const char *threadTaskFuncStub[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + + rtManualThreadDmuInfo_t *prefetchList; // dmu desc 0-64k, length is the last threadPrefetchDmuIdx[threadDim - 1] + rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtManualThreadAicAivInfo_t; + +typedef struct tagAutoThreadPrefetch { + uint64_t dataAddr; // device mem + uint32_t dataAddrOffset; + uint32_t nonTailDataLen; + uint32_t tailDataLen; +} rtAutoThreadPrefetch_t; + +typedef struct tagAutoThreadAicAivInfo { + uint64_t taskParamAddr; // device mem + uint16_t taskParamOffset; + // when satMode=1 and FP16 computation with none INF inputs overflows/underflows, results will be +/-INF of FP16 + // when satMode=0 and FP16 computation with none INF inputs overflows/underflows + // results will be saturated to +/- MAX of FP16 + uint8_t satMode; + uint8_t scheduleMode; // 0:normal mode, 1:batch mode, 2:sync mode, 3: reserved + uint8_t iCachePrefetchCnt; // units is 2K + uint8_t prefetchEnableBitmap; // 8 bit bitmap + uint8_t prefetchOnceBitmap; // 8 bit bitmap + + uint16_t tailBlkDim; + uint16_t nonTailBlkDim; + + const char *nonTailTaskFuncStub; + const char *tailTaskFuncStub; + + // for prefetch, valid num is prefetchEnableBitmap bit count + // if prefetchEnableBitmap = '00010011', need prefetch number is 3, srcPrefetch is only 0, 1, 2 is valid + rtAutoThreadPrefetch_t srcPrefetch[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtAutoThreadAicAivInfo_t; + +typedef struct tagAutoThreadCacheInfo { + uint64_t dataAddr; // device mem + uint32_t dataAddrOffset; + uint32_t nonTailDataLen; + uint32_t tailDataLen; + uint16_t ticketCacheRefCnt; +} rtAutoThreadCacheInfo_t; + +typedef struct tagManualThreadCacheInfo { + rtManualThreadDmuInfo_t *dmuList; // 0-64k + uint16_t dmuNum; + uint16_t sliceDmuIdx[RT_FFTS_MAX_MANUAL_THREAD_NUM]; + uint16_t ticketCacheRefCntTbl[RT_FFTS_MAX_MANUAL_THREAD_NUM]; +} rtManualThreadCacheInfo_t; + +typedef enum tagCacheOp { + RT_CACHE_OP_NONE = 0, + RT_CACHE_OP_FLUSH = 1, + RT_CACHE_OP_INVALIDATE = 2, + RT_CACHE_OP_WRITE_BACK = 3, +} rtCacheOp_t; + +typedef struct tagTicketCache { + rtCacheOp_t cacheOption; + uint8_t ticketCacheWindow; + union { + rtAutoThreadCacheInfo_t autoThreadCache; + rtManualThreadCacheInfo_t manualThreadCache; + } custom; +} rtTicketCache_t; + +typedef struct tagManualThreadNopInfo { + // depend srcTickCacheVldBitmap in rtFftsSubTaskInfo_t + rtManualThreadDependency_t srcDepTbl[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; +} rtManualThreadNopInfo_t; + +typedef struct tagFftsSubTaskInfo { + rtFftsSubTaskType_t subTaskType; + uint16_t threadDim; + uint8_t dstTickCacheVldBitmap; + uint8_t srcTickCacheVldBitmap; + uint8_t srcDataOutOfSubGraphBitmap; + uint8_t dstTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; + uint8_t srcTickCacheID[RT_FFTS_MAX_TICKET_CACHE_PER_SUBTASK]; + union { + rtAutoThreadAicAivInfo_t autoThreadAicAiv; + rtManualThreadAicAivInfo_t manualThreadAicAiv; + rtManualThreadNopInfo_t manualThreadNop; + } custom; +} rtFftsSubTaskInfo_t; + +typedef struct tagFftsDescInfo { + uint8_t tm; // thread subtask kickstart mode, 0:order, 1:disorder + uint8_t di; // discard invalidate + uint8_t dw; // discard write back + uint8_t df; // discard flush + uint8_t dataSplitUnit; // split source or ticket cache by 2~dataSplitUnit MB + uint8_t prefetchOstNum; + uint8_t cacheMaintainOstNum; + uint8_t aicPrefetchUpper; + uint8_t aicPrefetchLower; + uint8_t aivPrefetchUpper; + uint8_t aivPrefetchLower; +} rtFftsDescInfo_t; + +typedef struct tagFftsTaskInfo { + rtFftsType_t fftsType; + uint16_t subTaskNum; + uint16_t tickCacheNum; + rtFftsDescInfo_t fftsDesc; + // sub task desc, real num is subTaskNum + rtFftsSubTaskInfo_t subTask[RT_FFTS_MAX_SUB_TASK_NUM]; + + // ticket cache, real number is ticketCacheNum + rtTicketCache_t ticketCache[RT_FFTS_MAX_TICKET_CACHE_NUM]; +} rtFftsTaskInfo_t; + +RTS_API rtError_t rtFftsTaskLaunch(rtFftsTaskInfo_t *fftsTaskInfo, rtStream_t stream); + +#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) +} +#endif +#endif //__CCE_RUNTIME_FFTS_H diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 6481f655..74539222 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -50,6 +50,7 @@ typedef enum tagModelTaskType { RT_MODEL_TASK_STREAM_LABEL_SWITCH_BY_INDEX, RT_MODEL_TASK_STREAM_LABEL_GOTO, RT_MODEL_TASK_MODEL_EXIT, + RT_MODEL_TASK_FFTS_TASK, RT_MODEL_TASK_ALL_KERNEL, } rtModelTaskType_t; From 6927a8eef3663007b74d3cc6905ff7cf60633d91 Mon Sep 17 00:00:00 2001 From: zhou_chao1993 Date: Wed, 16 Jun 2021 11:15:37 +0800 Subject: [PATCH 24/51] modif dump config --- ge/common/dump/dump_manager.cc | 4 ++-- tests/ut/ge/common/dump_manager_unittest.cc | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ge/common/dump/dump_manager.cc b/ge/common/dump/dump_manager.cc index a6944fc6..ebe16fed 100644 --- a/ge/common/dump/dump_manager.cc +++ b/ge/common/dump/dump_manager.cc @@ -33,7 +33,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpManager &DumpManager::GetIn bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump_properties) { if (dump_config.dump_status.empty() && dump_config.dump_debug.empty()) { - dump_properties_map_.emplace(kInferSessionId, dump_properties); + dump_properties_map_[kInferSessionId] = dump_properties; GELOGI("Dump does not open"); return false; } @@ -41,7 +41,7 @@ bool DumpManager::NeedDoDump(const DumpConfig &dump_config, DumpProperties &dump if ((dump_config.dump_status == kDumpoff || dump_config.dump_status == kDumpOFF) && dump_config.dump_debug == kDumpoff) { dump_properties.ClearDumpPropertyValue(); - dump_properties_map_.emplace(kInferSessionId, dump_properties); + dump_properties_map_[kInferSessionId] = dump_properties; return false; } if (dump_config.dump_status == kDumpOn && dump_config.dump_debug == kDumpOn) { diff --git a/tests/ut/ge/common/dump_manager_unittest.cc b/tests/ut/ge/common/dump_manager_unittest.cc index 50eabc4a..7a242997 100644 --- a/tests/ut/ge/common/dump_manager_unittest.cc +++ b/tests/ut/ge/common/dump_manager_unittest.cc @@ -16,6 +16,8 @@ #include +#define protected public +#define private public #include "common/dump/dump_manager.h" #include "common/debug/log.h" #include "common/ge_inner_error_codes.h" @@ -102,4 +104,13 @@ TEST_F(UTEST_dump_manager, is_dump_single_op_close_success) { auto dump = DumpManager::GetInstance().GetDumpProperties(0); DumpManager::GetInstance().RemoveDumpProperties(0); } + + TEST_F(UTEST_dump_manager, not_need_do_dump) { + DumpConfig dump_config; + dump_config.dump_status = "off"; + dump_config.dump_debug = "off"; + DumpProperties dump_properties; + bool ret = DumpManager::GetInstance().NeedDoDump(dump_config, dump_properties); + EXPECT_EQ(ret, false); + } } // namespace ge \ No newline at end of file From 23c8a0d5811f5e808c610ca1f7efa1f9e75d4cd9 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 20:13:41 +0800 Subject: [PATCH 25/51] Fix ut. --- ge/hybrid/executor/hybrid_model_executor.cc | 2 +- ge/hybrid/executor/node_state.h | 5 +++++ ge/hybrid/executor/subgraph_executor.cc | 1 + ge/hybrid/executor/subgraph_executor.h | 2 +- ge/hybrid/executor/worker/shape_inference_engine.cc | 2 +- ge/single_op/single_op_model.cc | 8 ++------ tests/ut/ge/single_op/single_op_model_unittest.cc | 13 +++++++------ 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 2abd9cd6..9bf70d26 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -70,7 +70,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { context_.profiler->Dump(std::cout); context_.profiler->Reset(); } - root_graph_executor_->ResetContext(); + root_graph_executor_->ReleaseContext(); context_.iteration += 1; if (ret == END_OF_SEQUENCE) { diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 85f9e4c3..e8ccd416 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -177,6 +177,10 @@ struct NodeState { void SetTaskContext(std::shared_ptr &task_context); std::shared_ptr GetTaskContext(); + void SetSkipInferShape(bool skip_infershape) { skip_infershape_ = skip_infershape; } + + bool GetSkipInferShape() const { return skip_infershape_; } + private: bool IsScheduleReady() const; void SetDataSchedule(const NodeState &node_state, const std::function &ready); @@ -204,6 +208,7 @@ struct NodeState { int merge_index_ = -1; // Use for Execute (Reset after Executed). int switch_index_ = -1; // Use for Schedule (Reset after Prepared). int group_ = -1; + bool skip_infershape_ = false; }; } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/executor/subgraph_executor.cc b/ge/hybrid/executor/subgraph_executor.cc index c26eac9b..6979d05f 100644 --- a/ge/hybrid/executor/subgraph_executor.cc +++ b/ge/hybrid/executor/subgraph_executor.cc @@ -110,6 +110,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vectorSetShape(tensor_desc->GetShape()); output_desc->SetOriginShape(tensor_desc->GetOriginShape()); output_desc->SetDataType(tensor_desc->GetDataType()); + node_state->SetSkipInferShape(true); } } diff --git a/ge/hybrid/executor/subgraph_executor.h b/ge/hybrid/executor/subgraph_executor.h index 35f6e67e..76732c37 100644 --- a/ge/hybrid/executor/subgraph_executor.h +++ b/ge/hybrid/executor/subgraph_executor.h @@ -41,7 +41,7 @@ class SubgraphExecutor { Status PartialExecuteAsync(int task_group); - void ResetContext() { subgraph_context_.reset(nullptr); } + void ReleaseContext() { subgraph_context_.reset(nullptr); } /** * Execute subgraph async, output tensor address(not data) and output tensor descriptions are diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 18fed710..96959b80 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -70,7 +70,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference // Skipping infer shape of input node. GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); - if (node_state.GetType() != DATA_TYPE && node_state.GetType() != AIPP_DATA_TYPE) { + if (node_state.GetSkipInferShape()) { RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "[Invoke][InferShapeAndType] for %s failed.", node_item.NodeName().c_str()); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 182d1466..90a6362c 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -49,8 +49,8 @@ const uint32_t kOutputIndexOfData = 0; constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; Status CheckHostMem(const std::vector &dependencies, const NodePtr &node, bool &is_host_mem) { + auto op_desc = node->GetOpDesc(); for (const auto &input_name : dependencies) { - auto op_desc = node->GetOpDesc(); int input_index = op_desc->GetInputIndexByName(input_name); if (input_index < 0) { GELOGE(INTERNAL_ERROR, "[Get][InputIndex]failed, node:[%s] inputname: %s.", @@ -60,11 +60,7 @@ Status CheckHostMem(const std::vector &dependencies, const NodePtr &node return INTERNAL_ERROR; } - const auto &in_anchor = node->GetInDataAnchor(input_index); - GE_CHECK_NOTNULL(in_anchor); - const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor(); - GE_CHECK_NOTNULL(peer_out_anchor); - const auto &src_node = peer_out_anchor->GetOwnerNode(); + const auto &src_node = NodeUtils::GetInDataNodeByIndex(*node, input_index); GE_CHECK_NOTNULL(src_node); auto src_op_desc = src_node->GetOpDesc(); GE_CHECK_NOTNULL(src_op_desc); diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc index 63a3eafe..1975f9f4 100644 --- a/tests/ut/ge/single_op/single_op_model_unittest.cc +++ b/tests/ut/ge/single_op/single_op_model_unittest.cc @@ -213,7 +213,7 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { // make graph ut::GraphBuilder builder = ut::GraphBuilder("graph"); - auto data = builder.AddNode("Data", "Data", 0, 1); + auto data = builder.AddNode("Data", "Data", 1, 1); auto transdata = builder.AddNode("Transdata", "Transdata", 1, 1); auto netoutput = builder.AddNode("Netoutput", "NetOutput", 1, 0); builder.AddDataEdge(data, 0, transdata, 0); @@ -228,11 +228,6 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { op_desc->SetOpInferDepends(depend_names); (void)AttrUtils::SetBool(op_desc, kAttrSupportDynamicShape, true); - auto tensor = std::make_shared(); - auto data_desc = data->GetOpDesc(); - auto tensor_desc = data_desc->MutableInputDesc(0); - AttrUtils::SetTensor(tensor_desc, "_value", tensor); - // set task_def auto model_task_def = make_shared(); domi::TaskDef *task_def = model_task_def->add_task(); @@ -249,6 +244,12 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) { op_desc->impl_->input_name_idx_["Data"] = 0; model.BuildDynamicOp(res, dynamic_single_op); + + auto tensor = std::make_shared(); + auto data_desc = data->GetOpDesc(); + auto tensor_desc = data_desc->MutableInputDesc(0); + AttrUtils::SetTensor(tensor_desc, "_value", tensor); + model.BuildDynamicOp(res, dynamic_single_op); } TEST_F(UtestSingleOpModel, test_host_mem) { From b17eafe3dbf2dbf0a0f921d8941445425d2fae26 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 21:52:13 +0800 Subject: [PATCH 26/51] Fix bug. --- ge/hybrid/executor/node_state.h | 2 +- ge/hybrid/executor/worker/shape_inference_engine.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index e8ccd416..002e07ab 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -179,7 +179,7 @@ struct NodeState { void SetSkipInferShape(bool skip_infershape) { skip_infershape_ = skip_infershape; } - bool GetSkipInferShape() const { return skip_infershape_; } + bool SkipInferShape() const { return skip_infershape_; } private: bool IsScheduleReady() const; diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 96959b80..753818bc 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -70,7 +70,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference // Skipping infer shape of input node. GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); - if (node_state.GetSkipInferShape()) { + if (!node_state.SkipInferShape()) { RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "[Invoke][InferShapeAndType] for %s failed.", node_item.NodeName().c_str()); From 4bc0f6f2af291635ec162cea483c1974b8976d35 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Wed, 16 Jun 2021 22:00:50 +0800 Subject: [PATCH 27/51] Fix bug. --- ge/hybrid/executor/node_state.h | 2 +- ge/hybrid/executor/worker/shape_inference_engine.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h index 002e07ab..b80b60b0 100644 --- a/ge/hybrid/executor/node_state.h +++ b/ge/hybrid/executor/node_state.h @@ -179,7 +179,7 @@ struct NodeState { void SetSkipInferShape(bool skip_infershape) { skip_infershape_ = skip_infershape; } - bool SkipInferShape() const { return skip_infershape_; } + bool MaySkipShapeInference() const { return skip_infershape_; } private: bool IsScheduleReady() const; diff --git a/ge/hybrid/executor/worker/shape_inference_engine.cc b/ge/hybrid/executor/worker/shape_inference_engine.cc index 753818bc..50dc389c 100755 --- a/ge/hybrid/executor/worker/shape_inference_engine.cc +++ b/ge/hybrid/executor/worker/shape_inference_engine.cc @@ -70,7 +70,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { // Do shape inference // Skipping infer shape of input node. GELOGD("[%s] Start to invoke InferShapeAndType", node_item.NodeName().c_str()); - if (!node_state.SkipInferShape()) { + if (!node_state.MaySkipShapeInference()) { RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), "[Invoke][InferShapeAndType] for %s failed.", node_item.NodeName().c_str()); From 5bcb04dfb797158bc460fd43a2dc6c4058c41b6b Mon Sep 17 00:00:00 2001 From: wq160 Date: Thu, 17 Jun 2021 09:43:45 +0800 Subject: [PATCH 28/51] update submodule --- metadef | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadef b/metadef index 00c0c12e..8c5fd448 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 00c0c12eede6c7bce93a1eda5f0bb437ae80a7ec +Subproject commit 8c5fd4486f870d8b63213565aa39fdf1ba1e497a From 246d7e4fd8455f5ed5332b434b82d85b67f15358 Mon Sep 17 00:00:00 2001 From: y00500818 Date: Thu, 17 Jun 2021 10:36:12 +0800 Subject: [PATCH 29/51] bugfix for restore context --- ge/generator/ge_generator.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 575afb35..58047c89 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -674,6 +674,12 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr GELOGD("Current ctx is null."); ctx = nullptr; } + std::function callback = [&]() { + if (ctx != nullptr) { + (void)rtCtxSetCurrent(ctx); + } + }; + GE_MAKE_GUARD(restore, callback); GeRootModelPtr ge_root_model = nullptr; GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); @@ -712,11 +718,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr } return ret; } - - if (ctx != nullptr) { - (void)rtCtxSetCurrent(ctx); - } - return SUCCESS; } From 1bed26c72e9b8a7386703b8d698a4d55c379bb3f Mon Sep 17 00:00:00 2001 From: zhangxiaokun Date: Thu, 17 Jun 2021 11:28:14 +0800 Subject: [PATCH 30/51] Remove reduplicated useless proto --- ge/client/proto/ge_api.proto | 1 - ge/client/proto/ge_ir.proto | 193 --- ge/client/proto/insert_op.proto | 140 -- ge/client/proto/om.proto | 396 ----- ge/client/proto/task.proto | 179 --- ge/common/proto/ge_ir.proto | 193 --- ge/common/proto/insert_op.proto | 140 -- ge/common/proto/om.proto | 396 ----- ge/common/proto/op_mapping.proto | 75 - ge/common/proto/task.proto | 179 --- ge/common/proto/tensorflow/attr_value.proto | 70 - ge/common/proto/tensorflow/function.proto | 108 -- ge/common/proto/tensorflow/graph.proto | 64 - ge/common/proto/tensorflow/graph_library.proto | 22 - ge/common/proto/tensorflow/node_def.proto | 71 - ge/common/proto/tensorflow/op_def.proto | 172 -- ge/common/proto/tensorflow/resource_handle.proto | 37 - ge/common/proto/tensorflow/tensor.proto | 102 -- ge/common/proto/tensorflow/tensor_shape.proto | 53 - ge/common/proto/tensorflow/types.proto | 82 - ge/common/proto/tensorflow/versions.proto | 39 - ge/executor/proto/dump_task.proto | 113 -- ge/executor/proto/ge_ir.proto | 193 --- ge/executor/proto/insert_op.proto | 140 -- ge/executor/proto/om.proto | 396 ----- ge/executor/proto/op_mapping.proto | 75 - ge/executor/proto/task.proto | 179 --- ge/ge_local_engine/proto/task.proto | 179 --- ge/offline/proto/ge_ir.proto | 193 --- ge/offline/proto/insert_op.proto | 140 -- ge/offline/proto/om.proto | 396 ----- ge/offline/proto/task.proto | 179 --- ge/proto/caffe/caffe.proto | 1829 ---------------------- ge/proto/dump_task.proto | 113 -- ge/proto/fusion_model.proto | 21 - ge/proto/fwk_adapter.proto | 37 - ge/proto/ge_api.proto | 88 -- ge/proto/ge_ir.proto | 193 --- ge/proto/insert_op.proto | 140 -- ge/proto/om.proto | 396 ----- ge/proto/op_mapping.proto | 75 - ge/proto/optimizer_priority.proto | 7 - ge/proto/task.proto | 179 --- ge/proto/tensorflow/attr_value.proto | 70 - ge/proto/tensorflow/function.proto | 108 -- ge/proto/tensorflow/graph.proto | 64 - ge/proto/tensorflow/graph_library.proto | 22 - ge/proto/tensorflow/node_def.proto | 71 - ge/proto/tensorflow/op_def.proto | 172 -- ge/proto/tensorflow/resource_handle.proto | 37 - ge/proto/tensorflow/tensor.proto | 102 -- ge/proto/tensorflow/tensor_shape.proto | 53 - ge/proto/tensorflow/types.proto | 82 - ge/proto/tensorflow/versions.proto | 39 - 54 files changed, 8793 deletions(-) delete mode 100644 ge/client/proto/ge_api.proto delete mode 100644 ge/client/proto/ge_ir.proto delete mode 100644 ge/client/proto/insert_op.proto delete mode 100755 ge/client/proto/om.proto delete mode 100644 ge/client/proto/task.proto delete mode 100644 ge/common/proto/ge_ir.proto delete mode 100644 ge/common/proto/insert_op.proto delete mode 100644 ge/common/proto/om.proto delete mode 100644 ge/common/proto/op_mapping.proto delete mode 100644 ge/common/proto/task.proto delete mode 100644 ge/common/proto/tensorflow/attr_value.proto delete mode 100644 ge/common/proto/tensorflow/function.proto delete mode 100644 ge/common/proto/tensorflow/graph.proto delete mode 100644 ge/common/proto/tensorflow/graph_library.proto delete mode 100644 ge/common/proto/tensorflow/node_def.proto delete mode 100644 ge/common/proto/tensorflow/op_def.proto delete mode 100644 ge/common/proto/tensorflow/resource_handle.proto delete mode 100644 ge/common/proto/tensorflow/tensor.proto delete mode 100644 ge/common/proto/tensorflow/tensor_shape.proto delete mode 100644 ge/common/proto/tensorflow/types.proto delete mode 100644 ge/common/proto/tensorflow/versions.proto delete mode 100644 ge/executor/proto/dump_task.proto delete mode 100644 ge/executor/proto/ge_ir.proto delete mode 100644 ge/executor/proto/insert_op.proto delete mode 100644 ge/executor/proto/om.proto delete mode 100644 ge/executor/proto/op_mapping.proto delete mode 100644 ge/executor/proto/task.proto delete mode 100644 ge/ge_local_engine/proto/task.proto delete mode 100644 ge/offline/proto/ge_ir.proto delete mode 100644 ge/offline/proto/insert_op.proto delete mode 100644 ge/offline/proto/om.proto delete mode 100644 ge/offline/proto/task.proto delete mode 100644 ge/proto/caffe/caffe.proto delete mode 100644 ge/proto/dump_task.proto delete mode 100755 ge/proto/fusion_model.proto delete mode 100644 ge/proto/fwk_adapter.proto delete mode 100755 ge/proto/ge_api.proto delete mode 100644 ge/proto/ge_ir.proto delete mode 100644 ge/proto/insert_op.proto delete mode 100644 ge/proto/om.proto delete mode 100644 ge/proto/op_mapping.proto delete mode 100644 ge/proto/optimizer_priority.proto delete mode 100644 ge/proto/task.proto delete mode 100644 ge/proto/tensorflow/attr_value.proto delete mode 100644 ge/proto/tensorflow/function.proto delete mode 100644 ge/proto/tensorflow/graph.proto delete mode 100644 ge/proto/tensorflow/graph_library.proto delete mode 100644 ge/proto/tensorflow/node_def.proto delete mode 100644 ge/proto/tensorflow/op_def.proto delete mode 100644 ge/proto/tensorflow/resource_handle.proto delete mode 100644 ge/proto/tensorflow/tensor.proto delete mode 100644 ge/proto/tensorflow/tensor_shape.proto delete mode 100644 ge/proto/tensorflow/types.proto delete mode 100644 ge/proto/tensorflow/versions.proto diff --git a/ge/client/proto/ge_api.proto b/ge/client/proto/ge_api.proto deleted file mode 100644 index 26d705fe..00000000 --- a/ge/client/proto/ge_api.proto +++ /dev/null @@ -1 +0,0 @@ -../../proto/ge_api.proto \ No newline at end of file diff --git a/ge/client/proto/ge_ir.proto b/ge/client/proto/ge_ir.proto deleted file mode 100644 index c0ef3071..00000000 --- a/ge/client/proto/ge_ir.proto +++ /dev/null @@ -1,193 +0,0 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ - DT_VARIANT = 26; // variant type - DT_BF16 = 27; // bf16 type - DT_INT4 = 28; // int4 type -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - diff --git a/ge/client/proto/insert_op.proto b/ge/client/proto/insert_op.proto deleted file mode 100644 index 7d708865..00000000 --- a/ge/client/proto/insert_op.proto +++ /dev/null @@ -1,140 +0,0 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - float padding_value = 72; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} diff --git a/ge/client/proto/om.proto b/ge/client/proto/om.proto deleted file mode 100755 index e15e5f80..00000000 --- a/ge/client/proto/om.proto +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - diff --git a/ge/client/proto/task.proto b/ge/client/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/client/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/common/proto/ge_ir.proto b/ge/common/proto/ge_ir.proto deleted file mode 100644 index c0ef3071..00000000 --- a/ge/common/proto/ge_ir.proto +++ /dev/null @@ -1,193 +0,0 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ - DT_VARIANT = 26; // variant type - DT_BF16 = 27; // bf16 type - DT_INT4 = 28; // int4 type -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - diff --git a/ge/common/proto/insert_op.proto b/ge/common/proto/insert_op.proto deleted file mode 100644 index 7d708865..00000000 --- a/ge/common/proto/insert_op.proto +++ /dev/null @@ -1,140 +0,0 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - float padding_value = 72; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} diff --git a/ge/common/proto/om.proto b/ge/common/proto/om.proto deleted file mode 100644 index e15e5f80..00000000 --- a/ge/common/proto/om.proto +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - diff --git a/ge/common/proto/op_mapping.proto b/ge/common/proto/op_mapping.proto deleted file mode 100644 index d626eb49..00000000 --- a/ge/common/proto/op_mapping.proto +++ /dev/null @@ -1,75 +0,0 @@ -syntax = "proto3"; -package toolkit.aicpu.dump; - -message Shape { - repeated uint64 dim = 1; -} - -message Output { - int32 data_type = 1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - string original_name = 5; - int32 original_output_index = 6; - int32 original_output_data_type = 7; - int32 original_output_format = 8; - uint64 size = 9; - Shape origin_shape = 10; -} - -message Input { - int32 data_type =1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - uint64 size = 5; - Shape origin_shape = 6; -} - -enum BufferType { - L1 = 0; -} - -message OpBuffer { - BufferType buffer_type = 1; - uint64 address = 2; - uint64 size = 3; -} - -message Op { - string op_name = 1; - string op_type = 2; -} - -message Task { - uint32 task_id = 1; - uint32 stream_id = 2; - Op op = 3; - repeated Output output = 4; - bool end_graph = 5; - repeated Input input = 6; - repeated OpBuffer buffer = 7; -} - -message OpMappingInfo { - string dump_path = 1; - oneof model_name_param { - string model_name = 2; - } - oneof model_id_param { - uint32 model_id = 3; - } - oneof step_id { - uint64 step_id_addr = 4; - } - oneof iterations_per_loop { - uint64 iterations_per_loop_addr = 5; - } - oneof loop_cond { - uint64 loop_cond_addr = 6; - } - uint32 flag = 7; // 0x01 load, 0x00 unload - repeated Task task = 8; - string dump_step = 9; -} \ No newline at end of file diff --git a/ge/common/proto/task.proto b/ge/common/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/common/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/common/proto/tensorflow/attr_value.proto b/ge/common/proto/tensorflow/attr_value.proto deleted file mode 100644 index 438d7163..00000000 --- a/ge/common/proto/tensorflow/attr_value.proto +++ /dev/null @@ -1,70 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "AttrValueProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; - -// Protocol buffer representing the value for an attr used to configure an Op. -// Comment indicates the corresponding attr type. Only the field matching the -// attr type may be filled. -message AttrValue { - // LINT.IfChange - message ListValue { - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated DataType type = 6 [packed = true]; // "list(type)" - repeated TensorShapeProto shape = 7; // "list(shape)" - repeated TensorProto tensor = 8; // "list(tensor)" - repeated NameAttrList func = 9; // "list(attr)" - } - // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) - - oneof value { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - DataType type = 6; // "type" - TensorShapeProto shape = 7; // "shape" - TensorProto tensor = 8; // "tensor" - ListValue list = 1; // any "list(...)" - - // "func" represents a function. func.name is a function's name or - // a primitive op's name. func.attr.first is the name of an attr - // defined for that function. func.attr.second is the value for - // that attr in the instantiation. - NameAttrList func = 10; - - // This is a placeholder only used in nodes defined inside a - // function. It indicates the attr value will be supplied when - // the function is instantiated. For example, let us suppose a - // node "N" in function "FN". "N" has an attr "A" with value - // placeholder = "foo". When FN is instantiated with attr "foo" - // set to "bar", the instantiated node N's attr A will have been - // given the value "bar". - string placeholder = 9; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NameAttrList { - string name = 1; - map attr = 2; -} diff --git a/ge/common/proto/tensorflow/function.proto b/ge/common/proto/tensorflow/function.proto deleted file mode 100644 index 44681e32..00000000 --- a/ge/common/proto/tensorflow/function.proto +++ /dev/null @@ -1,108 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "FunctionProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; -import "node_def.proto"; -import "op_def.proto"; - -// A library is a set of named functions. -message FunctionDefLibrary { - repeated FunctionDef function = 1; - repeated GradientDef gradient = 2; -} - -// A function can be instantiated when the runtime can bind every attr -// with a value. When a GraphDef has a call to a function, it must -// have binding for every attr defined in the signature. -// * device spec, etc. -message FunctionDef { - // The definition of the function's name, arguments, return values, - // attrs etc. - OpDef signature = 1; - - // Attributes specific to this function definition. - map attr = 5; - - // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. - reserved 2; - - // In both of the following fields, there is the need to specify an - // output that is used as either the input to another node (in - // `node_def`) or as a return value of the function (in `ret`). - // Unlike the NodeDefs in GraphDef, we need to be able to specify a - // list in some cases (instead of just single outputs). Also, we - // need to be able to deal with lists of unknown length (so the - // output index may not be known at function definition time). So - // we use the following format instead: - // * "fun_in" where "fun_in" is the name of a function input arg in - // the `signature` field above. This represents that input, whether - // it is a single tensor or a list. - // * "fun_in:0" gives the first element of a function input arg (a - // non-list input is considered a list of length 1 for these - // purposes). - // * "node:out" where "node" is the name of a node in `node_def` and - // "out" is the name one of its op's output arguments (the name - // comes from the OpDef of the node's op). This represents that - // node's output, whether it is a single tensor or a list. - // Note: We enforce that an op's output arguments are never - // renamed in the backwards-compatibility test. - // * "node:out:0" gives the first element of a node output arg (a - // non-list output is considered a list of length 1 for these - // purposes). - // - // NOT CURRENTLY SUPPORTED (but may be in the future): - // * "node:out:-1" gives last element in a node output list - // * "node:out:1:" gives a list with all but the first element in a - // node output list - // * "node:out::-1" gives a list with all but the last element in a - // node output list - - // The body of the function. Unlike the NodeDefs in a GraphDef, attrs - // may have values of type `placeholder` and the `input` field uses - // the "output" format above. - - // By convention, "op" in node_def is resolved by consulting with a - // user-defined library first. If not resolved, "func" is assumed to - // be a builtin op. - repeated NodeDef node_def = 3; - - // A mapping from the output arg names from `signature` to the - // outputs from `node_def` that should be returned by the function. - map ret = 4; -} - -// GradientDef defines the gradient function of a function defined in -// a function library. -// -// A gradient function g (specified by gradient_func) for a function f -// (specified by function_name) must follow the following: -// -// The function 'f' must be a numerical function which takes N inputs -// and produces M outputs. Its gradient function 'g', which is a -// function taking N + M inputs and produces N outputs. -// -// I.e. if we have -// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N), -// then, g is -// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N, -// dL/dy1, dL/dy2, ..., dL/dy_M), -// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the -// loss function). dL/dx_i is the partial derivative of L with respect -// to x_i. -message GradientDef { - string function_name = 1; // The function name. - string gradient_func = 2; // The gradient function's name. -} diff --git a/ge/common/proto/tensorflow/graph.proto b/ge/common/proto/tensorflow/graph.proto deleted file mode 100644 index 73bfc6ee..00000000 --- a/ge/common/proto/tensorflow/graph.proto +++ /dev/null @@ -1,64 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "GraphProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "node_def.proto"; -import "function.proto"; -import "versions.proto"; - -// Represents the graph of operations -message GraphDef { - repeated NodeDef node = 1; - - // Compatibility versions of the graph. See core/public/version.h for version - // history. The GraphDef version is distinct from the TensorFlow version, and - // each release of TensorFlow will support a range of GraphDef versions. - VersionDef versions = 4; - - // Deprecated single version field; use versions above instead. Since all - // GraphDef changes before "versions" was introduced were forward - // compatible, this field is entirely ignored. - int32 version = 3 [deprecated = true]; - - // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET. - // - // "library" provides user-defined functions. - // - // Naming: - // * library.function.name are in a flat namespace. - // NOTE: We may need to change it to be hierarchical to support - // different orgs. E.g., - // { "/google/nn", { ... }}, - // { "/google/vision", { ... }} - // { "/org_foo/module_bar", { ... }} - // map named_lib; - // * If node[i].op is the name of one function in "library", - // node[i] is deemed as a function call. Otherwise, node[i].op - // must be a primitive operation supported by the runtime. - // - // - // Function call semantics: - // - // * The callee may start execution as soon as some of its inputs - // are ready. The caller may want to use Tuple() mechanism to - // ensure all inputs are ready in the same time. - // - // * The consumer of return values may start executing as soon as - // the return values the consumer depends on are ready. The - // consumer may want to use Tuple() mechanism to ensure the - // consumer does not start until all return values of the callee - // function are ready. - FunctionDefLibrary library = 2; -}; diff --git a/ge/common/proto/tensorflow/graph_library.proto b/ge/common/proto/tensorflow/graph_library.proto deleted file mode 100644 index 7bca0838..00000000 --- a/ge/common/proto/tensorflow/graph_library.proto +++ /dev/null @@ -1,22 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; - -import "graph.proto"; - -message GeGraphDef { - string name = 1; - GraphDef graph = 2; -} - -message GraphDefLibrary { - repeated GeGraphDef graph_def = 1; -}; \ No newline at end of file diff --git a/ge/common/proto/tensorflow/node_def.proto b/ge/common/proto/tensorflow/node_def.proto deleted file mode 100644 index 50cf5cac..00000000 --- a/ge/common/proto/tensorflow/node_def.proto +++ /dev/null @@ -1,71 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "NodeProto"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; - -message NodeDef { - // The name given to this operator. Used for naming inputs, - // logging, visualization, etc. Unique within a single GraphDef. - // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*". - string name = 1; - - // The operation name. There may be custom parameters in attrs. - // Op names starting with an underscore are reserved for internal use. - string op = 2; - - // Each input is "node:src_output" with "node" being a string name and - // "src_output" indicating which output tensor to use from "node". If - // "src_output" is 0 the ":0" suffix can be omitted. Regular inputs - // may optionally be followed by control inputs that have the format - // "^node". - repeated string input = 3; - - // A (possibly partial) specification for the device on which this - // node should be placed. - // The expected syntax for this string is as follows: - // - // DEVICE_SPEC ::= PARTIAL_SPEC - // - // PARTIAL_SPEC ::= ("/" CONSTRAINT) * - // CONSTRAINT ::= ("job:" JOB_NAME) - // | ("replica:" [1-9][0-9]*) - // | ("task:" [1-9][0-9]*) - // | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") ) - // - // Valid values for this string include: - // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification) - // * "/job:worker/device:GPU:3" (partial specification) - // * "" (no specification) - // - // If the constraints do not resolve to a single device (or if this - // field is empty or not present), the runtime will attempt to - // choose a device automatically. - string device = 4; - - // Operation-specific graph-construction-time configuration. - // Note that this should include all attrs defined in the - // corresponding OpDef, including those with a value matching - // the default -- this allows the default to change and makes - // NodeDefs easier to interpret on their own. However, if - // an attr with a default is not specified in this list, the - // default will be used. - // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and - // one of the names from the corresponding OpDef's attr field). - // The values must have a type matching the corresponding OpDef - // attr's type field. - // Add some examples here showing best practices. - map attr = 5; -}; diff --git a/ge/common/proto/tensorflow/op_def.proto b/ge/common/proto/tensorflow/op_def.proto deleted file mode 100644 index 7f0e8ce2..00000000 --- a/ge/common/proto/tensorflow/op_def.proto +++ /dev/null @@ -1,172 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "OpDefProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; -import "types.proto"; - -// Defines an operation. A NodeDef in a GraphDef specifies an Op by -// using the "op" field which should match the name of a OpDef. -// LINT.IfChange -message OpDef { - // Op names starting with an underscore are reserved for internal use. - // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*". - string name = 1; - - // For describing inputs and outputs. - message ArgDef { - // Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*". - string name = 1; - - // Human readable description. - string description = 2; - - // Describes the type of one or more tensors that are accepted/produced - // by this input/output arg. The only legal combinations are: - // * For a single tensor: either the "type" field is set or the - // "type_attr" field is set to the name of an attr with type "type". - // * For a sequence of tensors with the same type: the "number_attr" - // field will be set to the name of an attr with type "int", and - // either the "type" or "type_attr" field will be set as for - // single tensors. - // * For a sequence of tensors, the "type_list_attr" field will be set - // to the name of an attr with type "list(type)". - DataType type = 3; - string type_attr = 4; // if specified, attr must have type "type" - string number_attr = 5; // if specified, attr must have type "int" - // If specified, attr must have type "list(type)", and none of - // type, type_attr, and number_attr may be specified. - string type_list_attr = 6; - - // For inputs: if true, the inputs are required to be refs. - // By default, inputs can be either refs or non-refs. - // For outputs: if true, outputs are refs, otherwise they are not. - bool is_ref = 16; - }; - - // Description of the input(s). - repeated ArgDef input_arg = 2; - - // Description of the output(s). - repeated ArgDef output_arg = 3; - - // Description of the graph-construction-time configuration of this - // Op. That is to say, this describes the attr fields that will - // be specified in the NodeDef. - message AttrDef { - // A descriptive name for the argument. May be used, e.g. by the - // Python client, as a keyword argument name, and so should match - // the regexp "[a-z][a-z0-9_]+". - string name = 1; - - // One of the type names from attr_value.proto ("string", "list(string)", - // "int", etc.). - string type = 2; - - // A reasonable default for this attribute if the user does not supply - // a value. If not specified, the user must supply a value. - AttrValue default_value = 3; - - // Human-readable description. - string description = 4; - - - // --- Constraints --- - // These constraints are only in effect if specified. Default is no - // constraints. - - // For type == "int", this is a minimum value. For "list(___)" - // types, this is the minimum length. - bool has_minimum = 5; - int64 minimum = 6; - - // The set of allowed values. Has type that is the "list" version - // of the "type" field above (uses the "list" field of AttrValue). - // If type == "type" or "list(type)" above, then the "type" field - // of "allowed_values.list" has the set of allowed DataTypes. - // If type == "string" or "list(string)", then the "s" field of - // "allowed_values.list" has the set of allowed strings. - AttrValue allowed_values = 7; - } - repeated AttrDef attr = 4; - - // Optional deprecation based on GraphDef versions. - OpDeprecation deprecation = 8; - - // One-line human-readable description of what the Op does. - string summary = 5; - - // Additional, longer human-readable description of what the Op does. - string description = 6; - - // ------------------------------------------------------------------------- - // Which optimizations this operation can participate in. - - // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs) - bool is_commutative = 18; - - // If is_aggregate is true, then this operation accepts N >= 2 - // inputs and produces 1 output all of the same type. Should be - // associative and commutative, and produce output with the same - // shape as the input. The optimizer may replace an aggregate op - // taking input from multiple devices with a tree of aggregate ops - // that aggregate locally within each device (and possibly within - // groups of nearby devices) before communicating. - bool is_aggregate = 16; // for things like add - - // Other optimizations go here, like - // can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc. - - // ------------------------------------------------------------------------- - // Optimization constraints. - - // Ops are marked as stateful if their behavior depends on some state beyond - // their input tensors (e.g. variable reading op) or if they have - // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops - // must always produce the same output for the same input and have - // no side-effects. - // - // By default Ops may be moved between devices. Stateful ops should - // either not be moved, or should only be moved if that state can also - // be moved (e.g. via some sort of save / restore). - // Stateful ops are guaranteed to never be optimized away by Common - // Subexpression Elimination (CSE). - bool is_stateful = 17; // for things like variables, queue - - // ------------------------------------------------------------------------- - // Non-standard options. - - // By default, all inputs to an Op must be initialized Tensors. Ops - // that may initialize tensors for the first time should set this - // field to true, to allow the Op to take an uninitialized Tensor as - // input. - bool allows_uninitialized_input = 19; // for Assign, etc. -}; -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc) - -// Information about version-dependent deprecation of an op -message OpDeprecation { - // First GraphDef version at which the op is disallowed. - int32 version = 1; - - // Explanation of why it was deprecated and what to use instead. - string explanation = 2; -}; - -// A collection of OpDefs -message OpList { - repeated OpDef op = 1; -}; diff --git a/ge/common/proto/tensorflow/resource_handle.proto b/ge/common/proto/tensorflow/resource_handle.proto deleted file mode 100644 index 91c46c9a..00000000 --- a/ge/common/proto/tensorflow/resource_handle.proto +++ /dev/null @@ -1,37 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "ResourceHandle"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// Protocol buffer representing a handle to a tensorflow resource. Handles are -// not valid across executions, but can be serialized back and forth from within -// a single run. -message ResourceHandleProto { - // Unique name for the device containing the resource. - string device = 1; - - // Container in which this resource is placed. - string container = 2; - - // Unique name of this resource. - string name = 3; - - // Hash code for the type of the resource. Is only valid in the same device - // and in the same execution. - uint64 hash_code = 4; - - // For debug-only, the name of the type pointed to by this handle, if - // available. - string maybe_type_name = 5; -}; diff --git a/ge/common/proto/tensorflow/tensor.proto b/ge/common/proto/tensorflow/tensor.proto deleted file mode 100644 index 48eeb6c4..00000000 --- a/ge/common/proto/tensorflow/tensor.proto +++ /dev/null @@ -1,102 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "TensorProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "resource_handle.proto"; -import "tensor_shape.proto"; -import "types.proto"; - -// Protocol buffer representing a tensor. -message TensorProto { - DataType dtype = 1; - - // Shape of the tensor. - TensorShapeProto tensor_shape = 2; - - // Only one of the representations below is set, one of "tensor_contents" and - // the "xxx_val" attributes. We are not using oneof because as oneofs cannot - // contain repeated fields it would require another extra set of messages. - - // Version number. - // - // In version 0, if the "repeated xxx" representations contain only one - // element, that element is repeated to fill the shape. This makes it easy - // to represent a constant Tensor with a single value. - int32 version_number = 3; - - // Serialized raw tensor content from either Tensor::AsProtoTensorContent or - // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation - // can be used for all tensor types. The purpose of this representation is to - // reduce serialization overhead during RPC call by avoiding serialization of - // many repeated small items. - bytes tensor_content = 4; - - // Type specific representations that make it easy to create tensor protos in - // all languages. Only the representation corresponding to "dtype" can - // be set. The values hold the flattened representation of the tensor in - // row major order. - - // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll - // have some pointless zero padding for each value here. - repeated int32 half_val = 13 [packed = true]; - - // DT_FLOAT. - repeated float float_val = 5 [packed = true]; - - // DT_DOUBLE. - repeated double double_val = 6 [packed = true]; - - // DT_INT32, DT_INT16, DT_INT8, DT_UINT8. - repeated int32 int_val = 7 [packed = true]; - - // DT_STRING - repeated bytes string_val = 8; - - // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real - // and imaginary parts of i-th single precision complex. - repeated float scomplex_val = 9 [packed = true]; - - // DT_INT64 - repeated int64 int64_val = 10 [packed = true]; - - // DT_BOOL - repeated bool bool_val = 11 [packed = true]; - - // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real - // and imaginary parts of i-th double precision complex. - repeated double dcomplex_val = 12 [packed = true]; - - // DT_RESOURCE - repeated ResourceHandleProto resource_handle_val = 14; - - // DT_VARIANT - repeated VariantTensorDataProto variant_val = 15; - - // DT_UINT32 - repeated uint32 uint32_val = 16 [packed = true]; - - // DT_UINT64 - repeated uint64 uint64_val = 17 [packed = true]; -}; - -// Protocol buffer representing the serialization format of DT_VARIANT tensors. -message VariantTensorDataProto { - // Name of the type of objects being serialized. - string type_name = 1; - // Portions of the object that are not Tensors. - bytes metadata = 2; - // Tensors contained within objects being serialized. - repeated TensorProto tensors = 3; -} diff --git a/ge/common/proto/tensorflow/tensor_shape.proto b/ge/common/proto/tensorflow/tensor_shape.proto deleted file mode 100644 index 3a6d8c5a..00000000 --- a/ge/common/proto/tensorflow/tensor_shape.proto +++ /dev/null @@ -1,53 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -// Protocol buffer representing the shape of tensors. - -syntax = "proto3"; -option cc_enable_arenas = true; -option java_outer_classname = "TensorShapeProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -package domi.tensorflow; - -// Dimensions of a tensor. -message TensorShapeProto { - // One dimension of the tensor. - message Dim { - // Size of the tensor in that dimension. - // This value must be >= -1, but values of -1 are reserved for "unknown" - // shapes (values of -1 mean "unknown" dimension). Certain wrappers - // that work with TensorShapeProto may fail at runtime when deserializing - // a TensorShapeProto containing a dim value of -1. - int64 size = 1; - - // Optional name of the tensor dimension. - string name = 2; - }; - - // Dimensions of the tensor, such as {"input", 30}, {"output", 40} - // for a 30 x 40 2D tensor. If an entry has size -1, this - // corresponds to a dimension of unknown size. The names are - // optional. - // - // The order of entries in "dim" matters: It indicates the layout of the - // values in the tensor in-memory representation. - // - // The first entry in "dim" is the outermost dimension used to layout the - // values, the last entry is the innermost dimension. This matches the - // in-memory layout of RowMajor Eigen tensors. - // - // If "dim.size()" > 0, "unknown_rank" must be false. - repeated Dim dim = 2; - - // If true, the number of dimensions in the shape is unknown. - // - // If true, "dim.size()" must be 0. - bool unknown_rank = 3; -}; diff --git a/ge/common/proto/tensorflow/types.proto b/ge/common/proto/tensorflow/types.proto deleted file mode 100644 index f40e49cb..00000000 --- a/ge/common/proto/tensorflow/types.proto +++ /dev/null @@ -1,82 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "TypesProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// LINT.IfChange -enum DataType { - // Not a legal value for DataType. Used to indicate a DataType field - // has not been set. - DT_INVALID = 0; - - // Data types that all computation devices are expected to be - // capable to support. - DT_FLOAT = 1; - DT_DOUBLE = 2; - DT_INT32 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_INT8 = 6; - DT_STRING = 7; - DT_COMPLEX64 = 8; // Single-precision complex - DT_INT64 = 9; - DT_BOOL = 10; - DT_QINT8 = 11; // Quantized int8 - DT_QUINT8 = 12; // Quantized uint8 - DT_QINT32 = 13; // Quantized int32 - DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops. - DT_QINT16 = 15; // Quantized int16 - DT_QUINT16 = 16; // Quantized uint16 - DT_UINT16 = 17; - DT_COMPLEX128 = 18; // Double-precision complex - DT_HALF = 19; - DT_RESOURCE = 20; - DT_VARIANT = 21; // Arbitrary C++ data types - DT_UINT32 = 22; - DT_UINT64 = 23; - - // Do not use! These are only for parameters. Every enum above - // should have a corresponding value below (verified by types_test). - DT_FLOAT_REF = 101; - DT_DOUBLE_REF = 102; - DT_INT32_REF = 103; - DT_UINT8_REF = 104; - DT_INT16_REF = 105; - DT_INT8_REF = 106; - DT_STRING_REF = 107; - DT_COMPLEX64_REF = 108; - DT_INT64_REF = 109; - DT_BOOL_REF = 110; - DT_QINT8_REF = 111; - DT_QUINT8_REF = 112; - DT_QINT32_REF = 113; - DT_BFLOAT16_REF = 114; - DT_QINT16_REF = 115; - DT_QUINT16_REF = 116; - DT_UINT16_REF = 117; - DT_COMPLEX128_REF = 118; - DT_HALF_REF = 119; - DT_RESOURCE_REF = 120; - DT_VARIANT_REF = 121; - DT_UINT32_REF = 122; - DT_UINT64_REF = 123; -} -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/c/c_api.h, -// https://www.tensorflow.org/code/tensorflow/go/tensor.go, -// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc, -// https://www.tensorflow.org/code/tensorflow/core/framework/types.h, -// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc, -// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py, -// https://www.tensorflow.org/code/tensorflow/python/framework/function.py) diff --git a/ge/common/proto/tensorflow/versions.proto b/ge/common/proto/tensorflow/versions.proto deleted file mode 100644 index 4e81548f..00000000 --- a/ge/common/proto/tensorflow/versions.proto +++ /dev/null @@ -1,39 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "VersionsProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// Version information for a piece of serialized data -// -// There are different types of versions for each type of data -// (GraphDef, etc.), but they all have the same common shape -// described here. -// -// Each consumer has "consumer" and "min_producer" versions (specified -// elsewhere). A consumer is allowed to consume this data if -// -// producer >= min_producer -// consumer >= min_consumer -// consumer not in bad_consumers -// -message VersionDef { - // The version of the code that produced this data. - int32 producer = 1; - - // Any consumer below this version is not allowed to consume this data. - int32 min_consumer = 2; - - // Specific consumer versions which are disallowed (e.g. due to bugs). - repeated int32 bad_consumers = 3; -}; diff --git a/ge/executor/proto/dump_task.proto b/ge/executor/proto/dump_task.proto deleted file mode 100644 index a2411ddb..00000000 --- a/ge/executor/proto/dump_task.proto +++ /dev/null @@ -1,113 +0,0 @@ -syntax = "proto3"; -package toolkit.dump; - -enum OutputDataType { - DT_UNDEFINED = 0; - DT_FLOAT = 1; - DT_FLOAT16 = 2; - DT_INT8 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_UINT16 = 6; - DT_INT32 = 7; - DT_INT64 = 8; - DT_UINT32 = 9; - DT_UINT64 = 10; - DT_BOOL = 11; - DT_DOUBLE = 12; - DT_STRING = 13; - DT_DUAL_SUB_INT8 = 14; - DT_DUAL_SUB_UINT8 = 15; - DT_COMPLEX64 = 16; - DT_COMPLEX128 = 17; - DT_QINT8 = 18; - DT_QINT16 = 19; - DT_QINT32 = 20; - DT_QUINT8 = 21; - DT_QUINT16 = 22; - DT_RESOURCE = 23; - DT_STRING_REF = 24; - DT_DUAL = 25; - DT_VARIANT = 26; -} - -enum OutputFormat { - FORMAT_NCHW = 0; - FORMAT_NHWC = 1; - FORMAT_ND = 2; - FORMAT_NC1HWC0 = 3; - FORMAT_FRACTAL_Z = 4; - FORMAT_NC1C0HWPAD = 5; - FORMAT_NHWC1C0 = 6; - FORMAT_FSR_NCHW = 7; - FORMAT_FRACTAL_DECONV = 8; - FORMAT_C1HWNC0 = 9; - FORMAT_FRACTAL_DECONV_TRANSPOSE = 10; - FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11; - FORMAT_NC1HWC0_C04 = 12; - FORMAT_FRACTAL_Z_C04 = 13; - FORMAT_CHWN = 14; - FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15; - FORMAT_HWCN = 16; - FORMAT_NC1KHKWHWC0 = 17; - FORMAT_BN_WEIGHT = 18; - FORMAT_FILTER_HWCK = 19; - FORMAT_HASHTABLE_LOOKUP_LOOKUPS=20; - FORMAT_HASHTABLE_LOOKUP_KEYS = 21; - FORMAT_HASHTABLE_LOOKUP_VALUE = 22; - FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23; - FORMAT_HASHTABLE_LOOKUP_HITS=24; - FORMAT_C1HWNCoC0 = 25; - FORMAT_MD = 26; - FORMAT_NDHWC = 27; - FORMAT_FRACTAL_ZZ = 28; - FORMAT_FRACTAL_NZ = 29; - FORMAT_RESERVED = 30; -} - -message OriginalOp { - string name = 1; - uint32 output_index = 2; - OutputDataType data_type = 3; - OutputFormat format = 4; -} - -message Shape { - repeated uint64 dim = 1; -} - -message OpOutput { - OutputDataType data_type = 1; - OutputFormat format = 2; - Shape shape = 3; - OriginalOp original_op = 4; // the original op corresponding to the output - bytes data = 5; - uint64 size = 6; -} - -message OpInput { - OutputDataType data_type = 1; - OutputFormat format = 2; - Shape shape = 3; - bytes data = 4; - uint64 size = 5; -} - -enum BufferType { - L1 = 0; -} - -message OpBuffer { - BufferType buffer_type = 1; - bytes data = 2; - uint64 size = 3; -} - -message DumpData{ - string version = 1; - uint64 dump_time = 2; - repeated OpOutput output = 3; - repeated OpInput input = 4; - repeated OpBuffer buffer = 5; - string op_name = 6; -} diff --git a/ge/executor/proto/ge_ir.proto b/ge/executor/proto/ge_ir.proto deleted file mode 100644 index c0ef3071..00000000 --- a/ge/executor/proto/ge_ir.proto +++ /dev/null @@ -1,193 +0,0 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ - DT_VARIANT = 26; // variant type - DT_BF16 = 27; // bf16 type - DT_INT4 = 28; // int4 type -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - diff --git a/ge/executor/proto/insert_op.proto b/ge/executor/proto/insert_op.proto deleted file mode 100644 index 7d708865..00000000 --- a/ge/executor/proto/insert_op.proto +++ /dev/null @@ -1,140 +0,0 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - float padding_value = 72; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} diff --git a/ge/executor/proto/om.proto b/ge/executor/proto/om.proto deleted file mode 100644 index e15e5f80..00000000 --- a/ge/executor/proto/om.proto +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - diff --git a/ge/executor/proto/op_mapping.proto b/ge/executor/proto/op_mapping.proto deleted file mode 100644 index d626eb49..00000000 --- a/ge/executor/proto/op_mapping.proto +++ /dev/null @@ -1,75 +0,0 @@ -syntax = "proto3"; -package toolkit.aicpu.dump; - -message Shape { - repeated uint64 dim = 1; -} - -message Output { - int32 data_type = 1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - string original_name = 5; - int32 original_output_index = 6; - int32 original_output_data_type = 7; - int32 original_output_format = 8; - uint64 size = 9; - Shape origin_shape = 10; -} - -message Input { - int32 data_type =1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - uint64 size = 5; - Shape origin_shape = 6; -} - -enum BufferType { - L1 = 0; -} - -message OpBuffer { - BufferType buffer_type = 1; - uint64 address = 2; - uint64 size = 3; -} - -message Op { - string op_name = 1; - string op_type = 2; -} - -message Task { - uint32 task_id = 1; - uint32 stream_id = 2; - Op op = 3; - repeated Output output = 4; - bool end_graph = 5; - repeated Input input = 6; - repeated OpBuffer buffer = 7; -} - -message OpMappingInfo { - string dump_path = 1; - oneof model_name_param { - string model_name = 2; - } - oneof model_id_param { - uint32 model_id = 3; - } - oneof step_id { - uint64 step_id_addr = 4; - } - oneof iterations_per_loop { - uint64 iterations_per_loop_addr = 5; - } - oneof loop_cond { - uint64 loop_cond_addr = 6; - } - uint32 flag = 7; // 0x01 load, 0x00 unload - repeated Task task = 8; - string dump_step = 9; -} \ No newline at end of file diff --git a/ge/executor/proto/task.proto b/ge/executor/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/executor/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/ge_local_engine/proto/task.proto b/ge/ge_local_engine/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/ge_local_engine/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/offline/proto/ge_ir.proto b/ge/offline/proto/ge_ir.proto deleted file mode 100644 index c0ef3071..00000000 --- a/ge/offline/proto/ge_ir.proto +++ /dev/null @@ -1,193 +0,0 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ - DT_VARIANT = 26; // variant type - DT_BF16 = 27; // bf16 type - DT_INT4 = 28; // int4 type -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - diff --git a/ge/offline/proto/insert_op.proto b/ge/offline/proto/insert_op.proto deleted file mode 100644 index 7d708865..00000000 --- a/ge/offline/proto/insert_op.proto +++ /dev/null @@ -1,140 +0,0 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - float padding_value = 72; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} diff --git a/ge/offline/proto/om.proto b/ge/offline/proto/om.proto deleted file mode 100644 index e15e5f80..00000000 --- a/ge/offline/proto/om.proto +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - diff --git a/ge/offline/proto/task.proto b/ge/offline/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/offline/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/proto/caffe/caffe.proto b/ge/proto/caffe/caffe.proto deleted file mode 100644 index 20615fed..00000000 --- a/ge/proto/caffe/caffe.proto +++ /dev/null @@ -1,1829 +0,0 @@ -/** - * This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe - * - * This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto2"; - -package domi.caffe; - -// Specifies the shape (dimensions) of a Blob. -message BlobShape { - repeated int64 dim = 1 [packed = true]; -} - -message BlobProto { - optional BlobShape shape = 7; - repeated float data = 5 [packed = true]; - repeated float diff = 6 [packed = true]; - repeated double double_data = 8 [packed = true]; - repeated double double_diff = 9 [packed = true]; - optional bytes int8_data = 10; - repeated int32 int32_data = 11 [packed = true]; - repeated uint64 uint64_data = 12 [packed = true]; - // 4D dimensions -- deprecated. Use "shape" instead. - optional int32 num = 1 [default = 0]; - optional int32 channels = 2 [default = 0]; - optional int32 height = 3 [default = 0]; - optional int32 width = 4 [default = 0]; -} - -// The BlobProtoVector is simply a way to pass multiple blobproto instances -// around. -message BlobProtoVector { - repeated BlobProto blobs = 1; -} - -message Datum { - optional int32 channels = 1; - optional int32 height = 2; - optional int32 width = 3; - // the actual image data, in bytes - optional bytes data = 4; - optional int32 label = 5; - // Optionally, the datum could also hold float data. - repeated float float_data = 6; - // If true data contains an encoded image that need to be decoded - optional bool encoded = 7 [default = false]; -} - -message FillerParameter { - // The filler type. - optional string type = 1 [default = 'constant']; - optional float value = 2 [default = 0]; // the value in constant filler - optional float min = 3 [default = 0]; // the min value in uniform filler - optional float max = 4 [default = 1]; // the max value in uniform filler - optional float mean = 5 [default = 0]; // the mean value in Gaussian filler - optional float std = 6 [default = 1]; // the std value in Gaussian filler - // The expected number of non-zero output weights for a given input in - // Gaussian filler -- the default -1 means don't perform sparsification. - optional int32 sparse = 7 [default = -1]; - // Normalize the filler variance by fan_in, fan_out, or their average. - // Applies to 'xavier' and 'msra' fillers. - enum VarianceNorm { - FAN_IN = 0; - FAN_OUT = 1; - AVERAGE = 2; - } - optional VarianceNorm variance_norm = 8 [default = FAN_IN]; -} - -message NetParameter { - optional string name = 1; // consider giving the network a name - // DEPRECATED. See InputParameter. The input blobs to the network. - repeated string input = 3; - // DEPRECATED. See InputParameter. The shape of the input blobs. - repeated BlobShape input_shape = 8; - - // 4D input dimensions -- deprecated. Use "input_shape" instead. - // If specified, for each input blob there should be four - // values specifying the num, channels, height and width of the input blob. - // Thus, there should be a total of (4 * #input) numbers. - repeated int32 input_dim = 4; - - // Whether the network will force every layer to carry out backward operation. - // If set False, then whether to carry out backward is determined - // automatically according to the net structure and learning rates. - optional bool force_backward = 5 [default = false]; - // The current "state" of the network, including the phase, level, and stage. - // Some layers may be included/excluded depending on this state and the states - // specified in the layers' include and exclude fields. - optional NetState state = 6; - - // Print debugging information about results while running Net::Forward, - // Net::Backward, and Net::Update. - optional bool debug_info = 7 [default = false]; - - // The layers that make up the net. Each of their configurations, including - // connectivity and behavior, is specified as a LayerParameter. - repeated LayerParameter layer = 100; // ID 100 so layers are printed last. - - // DEPRECATED: use 'layer' instead. - repeated V1LayerParameter layers = 2; -} - -// NOTE -// Update the next available ID when you add a new SolverParameter field. -// -// SolverParameter next available ID: 42 (last added: layer_wise_reduce) -message SolverParameter { - ////////////////////////////////////////////////////////////////////////////// - // Specifying the train and test networks - // - // Exactly one train net must be specified using one of the following fields: - // train_net_param, train_net, net_param, net - // One or more test nets may be specified using any of the following fields: - // test_net_param, test_net, net_param, net - // If more than one test net field is specified (e.g., both net and - // test_net are specified), they will be evaluated in the field order given - // above: (1) test_net_param, (2) test_net, (3) net_param/net. - // A test_iter must be specified for each test_net. - // A test_level and/or a test_stage may also be specified for each test_net. - ////////////////////////////////////////////////////////////////////////////// - - // Proto filename for the train net, possibly combined with one or more - // test nets. - optional string net = 24; - // Inline train net param, possibly combined with one or more test nets. - optional NetParameter net_param = 25; - - optional string train_net = 1; // Proto filename for the train net. - repeated string test_net = 2; // Proto filenames for the test nets. - optional NetParameter train_net_param = 21; // Inline train net params. - repeated NetParameter test_net_param = 22; // Inline test net params. - - // The states for the train/test nets. Must be unspecified or - // specified once per net. - // - // By default, all states will have solver = true; - // train_state will have phase = TRAIN, - // and all test_state's will have phase = TEST. - // Other defaults are set according to the NetState defaults. - optional NetState train_state = 26; - repeated NetState test_state = 27; - - // The number of iterations for each test net. - repeated int32 test_iter = 3; - - // The number of iterations between two testing phases. - optional int32 test_interval = 4 [default = 0]; - optional bool test_compute_loss = 19 [default = false]; - // If true, run an initial test pass before the first iteration, - // ensuring memory availability and printing the starting value of the loss. - optional bool test_initialization = 32 [default = true]; - optional float base_lr = 5; // The base learning rate - // the number of iterations between displaying info. If display = 0, no info - // will be displayed. - optional int32 display = 6; - // Display the loss averaged over the last average_loss iterations - optional int32 average_loss = 33 [default = 1]; - optional int32 max_iter = 7; // the maximum number of iterations - // accumulate gradients over `iter_size` x `batch_size` instances - optional int32 iter_size = 36 [default = 1]; - - // The learning rate decay policy. The currently implemented learning rate - // policies are as follows: - // - fixed: always return base_lr. - // - step: return base_lr * gamma ^ (floor(iter / step)) - // - exp: return base_lr * gamma ^ iter - // - inv: return base_lr * (1 + gamma * iter) ^ (- power) - // - multistep: similar to step but it allows non uniform steps defined by - // stepvalue - // - poly: the effective learning rate follows a polynomial decay, to be - // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) - // - sigmoid: the effective learning rate follows a sigmod decay - // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) - // - // where base_lr, max_iter, gamma, step, stepvalue and power are defined - // in the solver parameter protocol buffer, and iter is the current iteration. - optional string lr_policy = 8; - optional float gamma = 9; // The parameter to compute the learning rate. - optional float power = 10; // The parameter to compute the learning rate. - optional float momentum = 11; // The momentum value. - optional float weight_decay = 12; // The weight decay. - // regularization types supported: L1 and L2 - // controlled by weight_decay - optional string regularization_type = 29 [default = "L2"]; - // the stepsize for learning rate policy "step" - optional int32 stepsize = 13; - // the stepsize for learning rate policy "multistep" - repeated int32 stepvalue = 34; - - // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, - // whenever their actual L2 norm is larger. - optional float clip_gradients = 35 [default = -1]; - - optional int32 snapshot = 14 [default = 0]; // The snapshot interval - optional string snapshot_prefix = 15; // The prefix for the snapshot. - // whether to snapshot diff in the results or not. Snapshotting diff will help - // debugging but the final protocol buffer size will be much larger. - optional bool snapshot_diff = 16 [default = false]; - enum SnapshotFormat { - HDF5 = 0; - BINARYPROTO = 1; - } - optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; - // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. - enum SolverMode { - CPU = 0; - GPU = 1; - } - optional SolverMode solver_mode = 17 [default = GPU]; - // the device_id will that be used in GPU mode. Use device_id = 0 in default. - optional int32 device_id = 18 [default = 0]; - // If non-negative, the seed with which the Solver will initialize the Caffe - // random number generator -- useful for reproducible results. Otherwise, - // (and by default) initialize using a seed derived from the system clock. - optional int64 random_seed = 20 [default = -1]; - - // type of the solver - optional string type = 40 [default = "SGD"]; - - // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam - optional float delta = 31 [default = 1e-8]; - // parameters for the Adam solver - optional float momentum2 = 39 [default = 0.999]; - - // RMSProp decay value - // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) - optional float rms_decay = 38 [default = 0.99]; - - // If true, print information about the state of the net that may help with - // debugging learning problems. - optional bool debug_info = 23 [default = false]; - - // If false, don't save a snapshot after training finishes. - optional bool snapshot_after_train = 28 [default = true]; - - // DEPRECATED: old solver enum types, use string instead - enum SolverType { - SGD = 0; - NESTEROV = 1; - ADAGRAD = 2; - RMSPROP = 3; - ADADELTA = 4; - ADAM = 5; - } - // DEPRECATED: use type instead of solver_type - optional SolverType solver_type = 30 [default = SGD]; - - // Overlap compute and communication for data parallel training - optional bool layer_wise_reduce = 41 [default = true]; -} - -// A message that stores the solver snapshots -message SolverState { - optional int32 iter = 1; // The current iteration - optional string learned_net = 2; // The file that stores the learned net. - repeated BlobProto history = 3; // The history for sgd solvers - optional int32 current_step = 4 [default = 0]; // The current step for learning rate -} - -enum Phase { - TRAIN = 0; - TEST = 1; -} - -message NetState { - optional Phase phase = 1 [default = TEST]; - optional int32 level = 2 [default = 0]; - repeated string stage = 3; -} - -message NetStateRule { - // Set phase to require the NetState have a particular phase (TRAIN or TEST) - // to meet this rule. - optional Phase phase = 1; - - // Set the minimum and/or maximum levels in which the layer should be used. - // Leave undefined to meet the rule regardless of level. - optional int32 min_level = 2; - optional int32 max_level = 3; - - // Customizable sets of stages to include or exclude. - // The net must have ALL of the specified stages and NONE of the specified - // "not_stage"s to meet the rule. - // (Use multiple NetStateRules to specify conjunctions of stages.) - repeated string stage = 4; - repeated string not_stage = 5; -} - -// Specifies training parameters (multipliers on global learning constants, -// and the name and other settings used for weight sharing). -message ParamSpec { - // The names of the parameter blobs -- useful for sharing parameters among - // layers, but never required otherwise. To share a parameter between two - // layers, give it a (non-empty) name. - optional string name = 1; - - // Whether to require shared weights to have the same shape, or just the same - // count -- defaults to STRICT if unspecified. - optional DimCheckMode share_mode = 2; - enum DimCheckMode { - // STRICT (default) requires that num, channels, height, width each match. - STRICT = 0; - // PERMISSIVE requires only the count (num*channels*height*width) to match. - PERMISSIVE = 1; - } - - // The multiplier on the global learning rate for this parameter. - optional float lr_mult = 3 [default = 1.0]; - - // The multiplier on the global weight decay for this parameter. - optional float decay_mult = 4 [default = 1.0]; -} - -// NOTE -// Update the next available ID when you add a new LayerParameter field. -// -// LayerParameter next available layer-specific ID: 151 (last added: smooth_l1_loss_param) -message LayerParameter { - optional string name = 1; // the layer name - optional string type = 2; // the layer type - repeated string bottom = 3; // the name of each bottom blob - repeated string top = 4; // the name of each top blob - - // The train / test phase for computation. - optional Phase phase = 10; - - // The amount of weight to assign each top blob in the objective. - // Each layer assigns a default value, usually of either 0 or 1, - // to each top blob. - repeated float loss_weight = 5; - - // Specifies training parameters (multipliers on global learning constants, - // and the name and other settings used for weight sharing). - repeated ParamSpec param = 6; - - // The blobs containing the numeric parameters of the layer. - repeated BlobProto blobs = 7; - - // Specifies whether to backpropagate to each bottom. If unspecified, - // Caffe will automatically infer whether each input needs backpropagation - // to compute parameter gradients. If set to true for some inputs, - // backpropagation to those inputs is forced; if set false for some inputs, - // backpropagation to those inputs is skipped. - // - // The size must be either 0 or equal to the number of bottoms. - repeated bool propagate_down = 11; - - // Rules controlling whether and when a layer is included in the network, - // based on the current NetState. You may specify a non-zero number of rules - // to include OR exclude, but not both. If no include or exclude rules are - // specified, the layer is always included. If the current NetState meets - // ANY (i.e., one or more) of the specified rules, the layer is - // included/excluded. - repeated NetStateRule include = 8; - repeated NetStateRule exclude = 9; - - // Parameters for data pre-processing. - optional TransformationParameter transform_param = 100; - - // Parameters shared by loss layers. - optional LossParameter loss_param = 101; - - // Layer type-specific parameters. - // - // Note: certain layers may have more than one computational engine - // for their implementation. These layers include an Engine type and - // engine parameter for selecting the implementation. - // The default for the engine is set by the ENGINE switch at compile-time. - optional AccuracyParameter accuracy_param = 102; - optional ArgMaxParameter argmax_param = 103; - optional BatchNormParameter batch_norm_param = 139; - optional BiasParameter bias_param = 141; - optional ConcatParameter concat_param = 104; - optional ContrastiveLossParameter contrastive_loss_param = 105; - optional ConvolutionParameter convolution_param = 106; - optional CropParameter crop_param = 144; - optional DataParameter data_param = 107; - optional DetectionOutputParameter detection_output_param = 150; - optional DropoutParameter dropout_param = 108; - optional DummyDataParameter dummy_data_param = 109; - optional EltwiseParameter eltwise_param = 110; - optional ELUParameter elu_param = 140; - optional EmbedParameter embed_param = 137; - optional ExpParameter exp_param = 111; - optional FlattenParameter flatten_param = 135; - optional HDF5DataParameter hdf5_data_param = 112; - optional HDF5OutputParameter hdf5_output_param = 113; - optional HingeLossParameter hinge_loss_param = 114; - optional ImageDataParameter image_data_param = 115; - optional InfogainLossParameter infogain_loss_param = 116; - optional InnerProductParameter inner_product_param = 117; - optional InputParameter input_param = 143; - optional LogParameter log_param = 134; - optional LRNParameter lrn_param = 118; - optional MemoryDataParameter memory_data_param = 119; - optional MVNParameter mvn_param = 120; - optional ParameterParameter parameter_param = 145; - optional PoolingParameter pooling_param = 121; - optional PowerParameter power_param = 122; - optional PReLUParameter prelu_param = 131; - optional PythonParameter python_param = 130; - optional RecurrentParameter recurrent_param = 146; - optional ReductionParameter reduction_param = 136; - optional ReLUParameter relu_param = 123; - optional ReshapeParameter reshape_param = 133; - optional ScaleParameter scale_param = 142; - optional SigmoidParameter sigmoid_param = 124; - optional SmoothL1LossParameter smooth_l1_loss_param = 148; - optional SoftmaxParameter softmax_param = 125; - optional SPPParameter spp_param = 132; - optional SliceParameter slice_param = 126; - optional TanHParameter tanh_param = 127; - optional ThresholdParameter threshold_param = 128; - optional TileParameter tile_param = 138; - optional WindowDataParameter window_data_param = 129; - optional PermuteParameter permute_param = 202; - optional PriorBoxParameter prior_box_param = 203; - optional NormalizeParameter norm_param = 206; - optional PSROIPoolingParameter psroi_pooling_param = 207; - optional FreespaceExtractParameter freespace_extract_param = 151; - optional PostprocessParameter postprocess_param = 152; - optional SpatialTransformParameter spatial_transform_param = 153; - optional ROIAlignParameter roi_align_param = 154; - optional ReorgParameter reorg_param = 155; - optional RegionParameter region_param = 156; - optional ReverseParameter reverse_param = 157; - optional InterpParameter interp_param = 158; - optional ShuffleChannelParameter shuffle_channel_param = 159; - optional UpsampleParameter upsample_param = 160; - optional ROIPoolingParameter roi_pooling_param = 161; - optional YoloParameter yolo_param = 199; - optional YoloV3DetectionOutputParameter yolov3_detection_output_param = 200; - optional ProposalParameter proposal_param = 201; - optional FSRDetectionOutputParameter fsrdetectionoutput_param = 222; - optional SSDDetectionOutputParameter ssddetectionoutput_param = 232; - optional YoloV2DetectionOutputParameter yolov2_detection_output_param = 204; - optional QuantParameter quant_param = 208; - optional CondTakeParameter condtake_param = 233; - optional MatrixInverseParameter matrix_inverse_param = 210; - optional WarpPerspectiveParameter warp_perspective_param = 234; - optional BatchMatMulParameter batch_matmul_param = 235; - optional SpatialTransformerParameter st_param = 5000; - optional YoloV3DetectionOutputV2Parameter yolov3_detection_output_v2_param = 5001; -} - -// Message that stores parameters used to apply transformation -// to the data layer's data -message TransformationParameter { - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 1 [default = 1]; - // Specify if we want to randomly mirror data. - optional bool mirror = 2 [default = false]; - // Specify if we would like to randomly crop an image. - optional uint32 crop_size = 3 [default = 0]; - // mean_file and mean_value cannot be specified at the same time - optional string mean_file = 4; - // if specified can be repeated once (would substract it from all the channels) - // or can be repeated the same number of times as channels - // (would subtract them from the corresponding channel) - repeated float mean_value = 5; - // Force the decoded image to have 3 color channels. - optional bool force_color = 6 [default = false]; - // Force the decoded image to have 1 color channels. - optional bool force_gray = 7 [default = false]; -} - -// Message that stores parameters shared by loss layers -message LossParameter { - // If specified, ignore instances with the given label. - optional int32 ignore_label = 1; - // How to normalize the loss for loss layers that aggregate across batches, - // spatial dimensions, or other dimensions. Currently only implemented in - // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. - enum NormalizationMode { - // Divide by the number of examples in the batch times spatial dimensions. - // Outputs that receive the ignore label will NOT be ignored in computing - // the normalization factor. - FULL = 0; - // Divide by the total number of output locations that do not take the - // ignore_label. If ignore_label is not set, this behaves like FULL. - VALID = 1; - // Divide by the batch size. - BATCH_SIZE = 2; - // Do not normalize the loss. - NONE = 3; - } - // For historical reasons, the default normalization for - // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. - optional NormalizationMode normalization = 3 [default = VALID]; - // Deprecated. Ignored if normalization is specified. If normalization - // is not specified, then setting this to false will be equivalent to - // normalization = BATCH_SIZE to be consistent with previous behavior. - optional bool normalize = 2; -} - -// Messages that store parameters used by individual layer types follow, in -// alphabetical order. - -message AccuracyParameter { - // When computing accuracy, count as correct by comparing the true label to - // the top k scoring classes. By default, only compare to the top scoring - // class (i.e. argmax). - optional uint32 top_k = 1 [default = 1]; - - // The "label" axis of the prediction blob, whose argmax corresponds to the - // predicted label -- may be negative to index from the end (e.g., -1 for the - // last axis). For example, if axis == 1 and the predictions are - // (N x C x H x W), the label blob is expected to contain N*H*W ground truth - // labels with integer values in {0, 1, ..., C-1}. - optional int32 axis = 2 [default = 1]; - - // If specified, ignore instances with the given label. - optional int32 ignore_label = 3; -} - -message ArgMaxParameter { - // If true produce pairs (argmax, maxval) - optional bool out_max_val = 1 [default = false]; - optional uint32 top_k = 2 [default = 1]; - // The axis along which to maximise -- may be negative to index from the - // end (e.g., -1 for the last axis). - // By default ArgMaxLayer maximizes over the flattened trailing dimensions - // for each index of the first / num dimension. - optional int32 axis = 3; -} - -message ConcatParameter { - // The axis along which to concatenate -- may be negative to index from the - // end (e.g., -1 for the last axis). Other axes must have the - // same dimension for all the bottom blobs. - // By default, ConcatLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 2 [default = 1]; - - // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 concat_dim = 1 [default = 1]; -} - -message BatchNormParameter { - // If false, normalization is performed over the current mini-batch - // and global statistics are accumulated (but not yet used) by a moving - // average. - // If true, those accumulated mean and variance values are used for the - // normalization. - // By default, it is set to false when the network is in the training - // phase and true when the network is in the testing phase. - optional bool use_global_stats = 1; - // What fraction of the moving average remains each iteration? - // Smaller values make the moving average decay faster, giving more - // weight to the recent values. - // Each iteration updates the moving average @f$S_{t-1}@f$ with the - // current mean @f$ Y_t @f$ by - // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ - // is the moving_average_fraction parameter. - optional float moving_average_fraction = 2 [default = .999]; - // Small value to add to the variance estimate so that we don't divide by - // zero. - optional float eps = 3 [default = 1e-5]; -} - -message BiasParameter { - // The first axis of bottom[0] (the first input Blob) along which to apply - // bottom[1] (the second input Blob). May be negative to index from the end - // (e.g., -1 for the last axis). - // - // For example, if bottom[0] is 4D with shape 100x3x40x60, the output - // top[0] will have the same shape, and bottom[1] may have any of the - // following shapes (for the given value of axis): - // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 - // (axis == 1 == -3) 3; 3x40; 3x40x60 - // (axis == 2 == -2) 40; 40x60 - // (axis == 3 == -1) 60 - // Furthermore, bottom[1] may have the empty shape (regardless of the value of - // "axis") -- a scalar bias. - optional int32 axis = 1 [default = 1]; - - // (num_axes is ignored unless just one bottom is given and the bias is - // a learned parameter of the layer. Otherwise, num_axes is determined by the - // number of axes by the second bottom.) - // The number of axes of the input (bottom[0]) covered by the bias - // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. - // Set num_axes := 0, to add a zero-axis Blob: a scalar. - optional int32 num_axes = 2 [default = 1]; - - // (filler is ignored unless just one bottom is given and the bias is - // a learned parameter of the layer.) - // The initialization for the learned bias parameter. - // Default is the zero (0) initialization, resulting in the BiasLayer - // initially performing the identity operation. - optional FillerParameter filler = 3; - optional bool bias_from_blob = 4 [default = true]; -} - -message ContrastiveLossParameter { - // margin for dissimilar pair - optional float margin = 1 [default = 1.0]; - // The first implementation of this cost did not exactly match the cost of - // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. - // legacy_version = false (the default) uses (margin - d)^2 as proposed in the - // Hadsell paper. New models should probably use this version. - // legacy_version = true uses (margin - d^2). This is kept to support / - // reproduce existing models and results - optional bool legacy_version = 2 [default = false]; -} - -message ConvolutionParameter { - optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms - - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in all spatial dimensions, or once per spatial dimension. - repeated uint32 pad = 3; // The padding size; defaults to 0 - repeated uint32 kernel_size = 4; // The kernel size - repeated uint32 stride = 6; // The stride; defaults to 1 - // Factor used to dilate the kernel, (implicitly) zero-filling the resulting - // holes. (Kernel dilation is sometimes referred to by its use in the - // algorithme à trous from Holschneider et al. 1987.) - repeated uint32 dilation = 18; // The dilation; defaults to 1 - - // For 2D convolution only, the *_h and *_w versions may also be used to - // specify both spatial dimensions. - optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) - optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) - optional uint32 kernel_h = 11; // The kernel height (2D only) - optional uint32 kernel_w = 12; // The kernel width (2D only) - optional uint32 stride_h = 13; // The stride height (2D only) - optional uint32 stride_w = 14; // The stride width (2D only) - - optional uint32 group = 5 [default = 1]; // The group size for group conv - - optional FillerParameter weight_filler = 7; // The filler for the weight - optional FillerParameter bias_filler = 8; // The filler for the bias - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 15 [default = DEFAULT]; - - // The axis to interpret as "channels" when performing convolution. - // Preceding dimensions are treated as independent inputs; - // succeeding dimensions are treated as "spatial". - // With (N, C, H, W) inputs, and axis == 1 (the default), we perform - // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for - // groups g>1) filters across the spatial axes (H, W) of the input. - // With (N, C, D, H, W) inputs, and axis == 1, we perform - // N independent 3D convolutions, sliding (C/g)-channels - // filters across the spatial axes (D, H, W) of the input. - optional int32 axis = 16 [default = 1]; - - // Whether to force use of the general ND convolution, even if a specific - // implementation for blobs of the appropriate number of spatial dimensions - // is available. (Currently, there is only a 2D-specific convolution - // implementation; for input blobs with num_axes != 2, this option is - // ignored and the ND implementation will be used.) - optional bool force_nd_im2col = 17 [default = false]; -} - -message CropParameter { - // To crop, elements of the first bottom are selected to fit the dimensions - // of the second, reference bottom. The crop is configured by - // - the crop `axis` to pick the dimensions for cropping - // - the crop `offset` to set the shift for all/each dimension - // to align the cropped bottom with the reference bottom. - // All dimensions up to but excluding `axis` are preserved, while - // the dimensions including and trailing `axis` are cropped. - // If only one `offset` is set, then all dimensions are offset by this amount. - // Otherwise, the number of offsets must equal the number of cropped axes to - // shift the crop in each dimension accordingly. - // Note: standard dimensions are N,C,H,W so the default is a spatial crop, - // and `axis` may be negative to index from the end (e.g., -1 for the last - // axis). - optional int32 axis = 1 [default = 2]; - repeated uint32 offset = 2; -} - -message DataParameter { - enum DB { - LEVELDB = 0; - LMDB = 1; - } - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 4; - // The rand_skip variable is for the data layer to skip a few data points - // to avoid all asynchronous sgd clients to start at the same point. The skip - // point would be set as rand_skip * rand(0,1). Note that rand_skip should not - // be larger than the number of keys in the database. - // DEPRECATED. Each solver accesses a different subset of the database. - optional uint32 rand_skip = 7 [default = 0]; - optional DB backend = 8 [default = LEVELDB]; - // DEPRECATED. See TransformationParameter. For data pre-processing, we can do - // simple scaling and subtracting the data mean, if provided. Note that the - // mean subtraction is always carried out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // DEPRECATED. See TransformationParameter. Specify if we would like to randomly - // crop an image. - optional uint32 crop_size = 5 [default = 0]; - // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror - // data. - optional bool mirror = 6 [default = false]; - // Force the encoded image to have 3 color channels - optional bool force_encoded_color = 9 [default = false]; - // Prefetch queue (Increase if data feeding bandwidth varies, within the - // limit of device memory for GPU training) - optional uint32 prefetch = 10 [default = 4]; -} - -message DropoutParameter { - optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio - optional bool scale_train = 2 [default = true]; // scale train or test phase -} - -// DummyDataLayer fills any number of arbitrarily shaped blobs with random -// (or constant) data generated by "Fillers" (see "message FillerParameter"). -message DummyDataParameter { - // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N - // shape fields, and 0, 1 or N data_fillers. - // - // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. - // If 1 data_filler is specified, it is applied to all top blobs. If N are - // specified, the ith is applied to the ith top blob. - repeated FillerParameter data_filler = 1; - repeated BlobShape shape = 6; - - // 4D dimensions -- deprecated. Use "shape" instead. - repeated uint32 num = 2; - repeated uint32 channels = 3; - repeated uint32 height = 4; - repeated uint32 width = 5; -} - -message EltwiseParameter { - enum EltwiseOp { - PROD = 0; - SUM = 1; - MAX = 2; - } - optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation - repeated float coeff = 2; // blob-wise coefficient for SUM operation - - // Whether to use an asymptotically slower (for >2 inputs) but stabler method - // of computing the gradient for the PROD operation. (No effect for SUM op.) - optional bool stable_prod_grad = 3 [default = true]; -} - -// Message that stores parameters used by ELULayer -message ELUParameter { - // Described in: - // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate - // Deep Network Learning by Exponential Linear Units (ELUs). arXiv - optional float alpha = 1 [default = 1]; -} - -// Message that stores parameters used by EmbedLayer -message EmbedParameter { - optional uint32 num_output = 1; // The number of outputs for the layer - // The input is given as integers to be interpreted as one-hot - // vector indices with dimension num_input. Hence num_input should be - // 1 greater than the maximum possible input value. - optional uint32 input_dim = 2; - - optional bool bias_term = 3 [default = true]; // Whether to use a bias term - optional FillerParameter weight_filler = 4; // The filler for the weight - optional FillerParameter bias_filler = 5; // The filler for the bias - -} - -// Message that stores parameters used by ExpLayer -message ExpParameter { - // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. - // Or if base is set to the default (-1), base is set to e, - // so y = exp(shift + scale * x). - optional float base = 1 [default = -1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} - -/// Message that stores parameters used by FlattenLayer -message FlattenParameter { - // The first axis to flatten: all preceding axes are retained in the output. - // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 1 [default = 1]; - - // The last axis to flatten: all following axes are retained in the output. - // May be negative to index from the end (e.g., the default -1 for the last - // axis). - optional int32 end_axis = 2 [default = -1]; -} - -// Message that stores parameters used by HDF5DataLayer -message HDF5DataParameter { - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 2; - - // Specify whether to shuffle the data. - // If shuffle == true, the ordering of the HDF5 files is shuffled, - // and the ordering of data within any given HDF5 file is shuffled, - // but data between different files are not interleaved; all of a file's - // data are output (in a random order) before moving onto another file. - optional bool shuffle = 3 [default = false]; -} - -message HDF5OutputParameter { - optional string file_name = 1; -} - -message HingeLossParameter { - enum Norm { - L1 = 1; - L2 = 2; - } - // Specify the Norm to use L1 or L2 - optional Norm norm = 1 [default = L1]; -} - -message ImageDataParameter { - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 4 [default = 1]; - // The rand_skip variable is for the data layer to skip a few data points - // to avoid all asynchronous sgd clients to start at the same point. The skip - // point would be set as rand_skip * rand(0,1). Note that rand_skip should not - // be larger than the number of keys in the database. - optional uint32 rand_skip = 7 [default = 0]; - // Whether or not ImageLayer should shuffle the list of files at every epoch. - optional bool shuffle = 8 [default = false]; - // It will also resize images if new_height or new_width are not zero. - optional uint32 new_height = 9 [default = 0]; - optional uint32 new_width = 10 [default = 0]; - // Specify if the images are color or gray - optional bool is_color = 11 [default = true]; - // DEPRECATED. See TransformationParameter. For data pre-processing, we can do - // simple scaling and subtracting the data mean, if provided. Note that the - // mean subtraction is always carried out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // DEPRECATED. See TransformationParameter. Specify if we would like to randomly - // crop an image. - optional uint32 crop_size = 5 [default = 0]; - // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror - // data. - optional bool mirror = 6 [default = false]; - optional string root_folder = 12 [default = ""]; -} - -message InfogainLossParameter { - // Specify the infogain matrix source. - optional string source = 1; - optional int32 axis = 2 [default = 1]; // axis of prob -} - -message InnerProductParameter { - optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms - optional FillerParameter weight_filler = 3; // The filler for the weight - optional FillerParameter bias_filler = 4; // The filler for the bias - - // The first axis to be lumped into a single inner product computation; - // all preceding axes are retained in the output. - // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 5 [default = 1]; - // Specify whether to transpose the weight matrix or not. - // If transpose == true, any operations will be performed on the transpose - // of the weight matrix. The weight matrix itself is not going to be transposed - // but rather the transfer flag of operations will be toggled accordingly. - optional bool transpose = 6 [default = false]; -} - -message InputParameter { - // This layer produces N >= 1 top blob(s) to be assigned manually. - // Define N shapes to set a shape for each top. - // Define 1 shape to set the same shape for every top. - // Define no shape to defer to reshaping manually. - repeated BlobShape shape = 1; -} - -// Message that stores parameters used by LogLayer -message LogParameter { - // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. - // Or if base is set to the default (-1), base is set to e, - // so y = ln(shift + scale * x) = log_e(shift + scale * x) - optional float base = 1 [default = -1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} - -// Message that stores parameters used by LRNLayer -message LRNParameter { - optional uint32 local_size = 1 [default = 5]; - optional float alpha = 2 [default = 1.]; - optional float beta = 3 [default = 0.75]; - enum NormRegion { - ACROSS_CHANNELS = 0; - WITHIN_CHANNEL = 1; - } - optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; - optional float k = 5 [default = 1.]; - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 6 [default = DEFAULT]; -} - -message MemoryDataParameter { - optional uint32 batch_size = 1; - optional uint32 channels = 2; - optional uint32 height = 3; - optional uint32 width = 4; -} - -message MVNParameter { - // This parameter can be set to false to normalize mean only - optional bool normalize_variance = 1 [default = true]; - - // This parameter can be set to true to perform DNN-like MVN - optional bool across_channels = 2 [default = false]; - - // Epsilon for not dividing by zero while normalizing variance - optional float eps = 3 [default = 1e-9]; -} - -message ParameterParameter { - optional BlobShape shape = 1; -} - -message PoolingParameter { - enum PoolMethod { - MAX = 0; - AVE = 1; - STOCHASTIC = 2; - } - optional PoolMethod pool = 1 [default = MAX]; // The pooling method - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) - optional uint32 pad_h = 9 [default = 0]; // The padding height - optional uint32 pad_w = 10 [default = 0]; // The padding width - optional uint32 kernel_size = 2; // The kernel size (square) - optional uint32 kernel_h = 5; // The kernel height - optional uint32 kernel_w = 6; // The kernel width - optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) - optional uint32 stride_h = 7; // The stride height - optional uint32 stride_w = 8; // The stride width - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 11 [default = DEFAULT]; - // If global_pooling then it will pool over the size of the bottom by doing - // kernel_h = bottom->height and kernel_w = bottom->width - optional bool global_pooling = 12 [default = false]; - optional bool ceil_mode = 13 [default = true]; - // How to calculate the output size - using ceil (default) or floor rounding. - enum RoundMode { - CEIL = 0; - FLOOR = 1; - } - optional RoundMode round_mode = 14 [default = CEIL]; -} - -message PowerParameter { - // PowerLayer computes outputs y = (shift + scale * x) ^ power. - optional float power = 1 [default = 1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} - -message PythonParameter { - optional string module = 1; - optional string layer = 2; - // This value is set to the attribute `param_str` of the `PythonLayer` object - // in Python before calling the `setup()` method. This could be a number, - // string, dictionary in Python dict format, JSON, etc. You may parse this - // string in `setup` method and use it in `forward` and `backward`. - optional string param_str = 3 [default = '']; - // Whether this PythonLayer is shared among worker solvers during data parallelism. - // If true, each worker solver sequentially run forward from this layer. - // This value should be set true if you are using it as a data layer. - optional bool share_in_parallel = 4 [default = false]; -} - -// Message that stores parameters used by RecurrentLayer -message RecurrentParameter { - // The dimension of the output (and usually hidden state) representation -- - // must be explicitly set to non-zero. - optional uint32 num_output = 1 [default = 0]; - - optional FillerParameter weight_filler = 2; // The filler for the weight - optional FillerParameter bias_filler = 3; // The filler for the bias - - // Whether to enable displaying debug_info in the unrolled recurrent net. - optional bool debug_info = 4 [default = false]; - - // Whether to add as additional inputs (bottoms) the initial hidden state - // blobs, and add as additional outputs (tops) the final timestep hidden state - // blobs. The number of additional bottom/top blobs required depends on the - // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. - optional bool expose_hidden = 5 [default = false]; -} - -// Message that stores parameters used by ReductionLayer -message ReductionParameter { - enum ReductionOp { - SUM = 1; - ASUM = 2; - SUMSQ = 3; - MEAN = 4; - } - - optional ReductionOp operation = 1 [default = SUM]; // reduction operation - - // The first axis to reduce to a scalar -- may be negative to index from the - // end (e.g., -1 for the last axis). - // (Currently, only reduction along ALL "tail" axes is supported; reduction - // of axis M through N, where N < num_axes - 1, is unsupported.) - // Suppose we have an n-axis bottom Blob with shape: - // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). - // If axis == m, the output Blob will have shape - // (d0, d1, d2, ..., d(m-1)), - // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) - // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. - // If axis == 0 (the default), the output Blob always has the empty shape - // (count 1), performing reduction across the entire input -- - // often useful for creating new loss functions. - optional int32 axis = 2 [default = 0]; - - optional float coeff = 3 [default = 1.0]; // coefficient for output -} - -// Message that stores parameters used by ReLULayer -message ReLUParameter { - // Allow non-zero slope for negative inputs to speed up optimization - // Described in: - // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities - // improve neural network acoustic models. In ICML Workshop on Deep Learning - // for Audio, Speech, and Language Processing. - optional float negative_slope = 1 [default = 0]; - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 2 [default = DEFAULT]; -} - -message ReshapeParameter { - // Specify the output dimensions. If some of the dimensions are set to 0, - // the corresponding dimension from the bottom layer is used (unchanged). - // Exactly one dimension may be set to -1, in which case its value is - // inferred from the count of the bottom blob and the remaining dimensions. - // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: - // - // layer { - // type: "Reshape" bottom: "input" top: "output" - // reshape_param { ... } - // } - // - // If "input" is 2D with shape 2 x 8, then the following reshape_param - // specifications are all equivalent, producing a 3D blob "output" with shape - // 2 x 2 x 4: - // - // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } - // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } - // - optional BlobShape shape = 1; - - // axis and num_axes control the portion of the bottom blob's shape that are - // replaced by (included in) the reshape. By default (axis == 0 and - // num_axes == -1), the entire bottom blob shape is included in the reshape, - // and hence the shape field must specify the entire output shape. - // - // axis may be non-zero to retain some portion of the beginning of the input - // shape (and may be negative to index from the end; e.g., -1 to begin the - // reshape after the last axis, including nothing in the reshape, - // -2 to include only the last axis, etc.). - // - // For example, suppose "input" is a 2D blob with shape 2 x 8. - // Then the following ReshapeLayer specifications are all equivalent, - // producing a blob "output" with shape 2 x 2 x 4: - // - // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } - // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } - // - // num_axes specifies the extent of the reshape. - // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on - // input axes in the range [axis, axis+num_axes]. - // num_axes may also be -1, the default, to include all remaining axes - // (starting from axis). - // - // For example, suppose "input" is a 2D blob with shape 2 x 8. - // Then the following ReshapeLayer specifications are equivalent, - // producing a blob "output" with shape 1 x 2 x 8. - // - // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } - // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } - // reshape_param { shape { dim: 1 } num_axes: 0 } - // - // On the other hand, these would produce output blob shape 2 x 1 x 8: - // - // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } - // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } - // - optional int32 axis = 2 [default = 0]; - optional int32 num_axes = 3 [default = -1]; -} - - -message ScaleParameter { - // The first axis of bottom[0] (the first input Blob) along which to apply - // bottom[1] (the second input Blob). May be negative to index from the end - // (e.g., -1 for the last axis). - // - // For example, if bottom[0] is 4D with shape 100x3x40x60, the output - // top[0] will have the same shape, and bottom[1] may have any of the - // following shapes (for the given value of axis): - // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 - // (axis == 1 == -3) 3; 3x40; 3x40x60 - // (axis == 2 == -2) 40; 40x60 - // (axis == 3 == -1) 60 - // Furthermore, bottom[1] may have the empty shape (regardless of the value of - // "axis") -- a scalar multiplier. - optional int32 axis = 1 [default = 1]; - - // (num_axes is ignored unless just one bottom is given and the scale is - // a learned parameter of the layer. Otherwise, num_axes is determined by the - // number of axes by the second bottom.) - // The number of axes of the input (bottom[0]) covered by the scale - // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. - // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. - optional int32 num_axes = 2 [default = 1]; - - // (filler is ignored unless just one bottom is given and the scale is - // a learned parameter of the layer.) - // The initialization for the learned scale parameter. - // Default is the unit (1) initialization, resulting in the ScaleLayer - // initially performing the identity operation. - optional FillerParameter filler = 3; - - // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but - // may be more efficient). Initialized with bias_filler (defaults to 0). - optional bool bias_term = 4 [default = false]; - optional FillerParameter bias_filler = 5; - optional bool scale_from_blob = 6 [default = true]; -} - -message SigmoidParameter { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; -} - -message SliceParameter { - // The axis along which to slice -- may be negative to index from the end - // (e.g., -1 for the last axis). - // By default, SliceLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 3 [default = 1]; - repeated uint32 slice_point = 2; - - // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 slice_dim = 1 [default = 1]; -} - -message SmoothL1LossParameter { - // SmoothL1Loss(x) = - // 0.5 * (sigma * x) ** 2 -- if x < 1.0 / sigma / sigma - // |x| - 0.5 / sigma / sigma -- otherwise - optional float sigma = 1 [default = 1]; -} - -// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer -message SoftmaxParameter { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; - - // The axis along which to perform the softmax -- may be negative to index - // from the end (e.g., -1 for the last axis). - // Any other axes will be evaluated as independent softmaxes. - optional int32 axis = 2 [default = 1]; -} - -message TanHParameter { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; -} - -// Message that stores parameters used by TileLayer -message TileParameter { - // The index of the axis to tile. - optional int32 axis = 1 [default = 1]; - - // The number of copies (tiles) of the blob to output. - optional int32 tiles = 2; -} - -// Message that stores parameters used by ThresholdLayer -message ThresholdParameter { - optional float threshold = 1 [default = 0]; // Strictly positive values -} - -message WindowDataParameter { - // Specify the data source. - optional string source = 1; - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // Specify the batch size. - optional uint32 batch_size = 4; - // Specify if we would like to randomly crop an image. - optional uint32 crop_size = 5 [default = 0]; - // Specify if we want to randomly mirror data. - optional bool mirror = 6 [default = false]; - // Foreground (object) overlap threshold - optional float fg_threshold = 7 [default = 0.5]; - // Background (non-object) overlap threshold - optional float bg_threshold = 8 [default = 0.5]; - // Fraction of batch that should be foreground objects - optional float fg_fraction = 9 [default = 0.25]; - // Amount of contextual padding to add around a window - // (used only by the window_data_layer) - optional uint32 context_pad = 10 [default = 0]; - // Mode for cropping out a detection window - // warp: cropped window is warped to a fixed size and aspect ratio - // square: the tightest square around the window is cropped - optional string crop_mode = 11 [default = "warp"]; - // cache_images: will load all images in memory for faster access - optional bool cache_images = 12 [default = false]; - // append root_folder to locate images - optional string root_folder = 13 [default = ""]; -} - -message SPPParameter { - enum PoolMethod { - MAX = 0; - AVE = 1; - STOCHASTIC = 2; - } - optional uint32 pyramid_height = 1; - optional PoolMethod pool = 2 [default = MAX]; // The pooling method - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 6 [default = DEFAULT]; -} - -// DEPRECATED: use LayerParameter. -message V1LayerParameter { - repeated string bottom = 2; - repeated string top = 3; - optional string name = 4; - repeated NetStateRule include = 32; - repeated NetStateRule exclude = 33; - enum LayerType { - NONE = 0; - ABSVAL = 35; - ACCURACY = 1; - ARGMAX = 30; - BNLL = 2; - CONCAT = 3; - CONTRASTIVE_LOSS = 37; - CONVOLUTION = 4; - DATA = 5; - DECONVOLUTION = 39; - DROPOUT = 6; - DUMMY_DATA = 32; - EUCLIDEAN_LOSS = 7; - ELTWISE = 25; - EXP = 38; - FLATTEN = 8; - HDF5_DATA = 9; - HDF5_OUTPUT = 10; - HINGE_LOSS = 28; - IM2COL = 11; - IMAGE_DATA = 12; - INFOGAIN_LOSS = 13; - INNER_PRODUCT = 14; - LRN = 15; - MEMORY_DATA = 29; - MULTINOMIAL_LOGISTIC_LOSS = 16; - MVN = 34; - POOLING = 17; - POWER = 26; - RELU = 18; - SIGMOID = 19; - SIGMOID_CROSS_ENTROPY_LOSS = 27; - SILENCE = 36; - SOFTMAX = 20; - SOFTMAX_LOSS = 21; - SPLIT = 22; - SLICE = 33; - TANH = 23; - WINDOW_DATA = 24; - THRESHOLD = 31; - QUANT = 208; - DEQUANT = 209; - } - optional LayerType type = 5; - repeated BlobProto blobs = 6; - repeated string param = 1001; - repeated DimCheckMode blob_share_mode = 1002; - enum DimCheckMode { - STRICT = 0; - PERMISSIVE = 1; - } - repeated float blobs_lr = 7; - repeated float weight_decay = 8; - repeated float loss_weight = 35; - optional AccuracyParameter accuracy_param = 27; - optional ArgMaxParameter argmax_param = 23; - optional ConcatParameter concat_param = 9; - optional ContrastiveLossParameter contrastive_loss_param = 40; - optional ConvolutionParameter convolution_param = 10; - optional DataParameter data_param = 11; - optional DropoutParameter dropout_param = 12; - optional DummyDataParameter dummy_data_param = 26; - optional EltwiseParameter eltwise_param = 24; - optional ExpParameter exp_param = 41; - optional HDF5DataParameter hdf5_data_param = 13; - optional HDF5OutputParameter hdf5_output_param = 14; - optional HingeLossParameter hinge_loss_param = 29; - optional ImageDataParameter image_data_param = 15; - optional InfogainLossParameter infogain_loss_param = 16; - optional InnerProductParameter inner_product_param = 17; - optional LRNParameter lrn_param = 18; - optional MemoryDataParameter memory_data_param = 22; - optional MVNParameter mvn_param = 34; - optional PoolingParameter pooling_param = 19; - optional PowerParameter power_param = 21; - optional ReLUParameter relu_param = 30; - optional SigmoidParameter sigmoid_param = 38; - optional SoftmaxParameter softmax_param = 39; - optional SliceParameter slice_param = 31; - optional TanHParameter tanh_param = 37; - optional ThresholdParameter threshold_param = 25; - optional WindowDataParameter window_data_param = 20; - optional TransformationParameter transform_param = 36; - optional LossParameter loss_param = 42; - optional V0LayerParameter layer = 1; -} - -// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters -// in Caffe. We keep this message type around for legacy support. -message V0LayerParameter { - optional string name = 1; // the layer name - optional string type = 2; // the string to specify the layer type - - // Parameters to specify layers with inner products. - optional uint32 num_output = 3; // The number of outputs for the layer - optional bool biasterm = 4 [default = true]; // whether to have bias terms - optional FillerParameter weight_filler = 5; // The filler for the weight - optional FillerParameter bias_filler = 6; // The filler for the bias - - optional uint32 pad = 7 [default = 0]; // The padding size - optional uint32 kernelsize = 8; // The kernel size - optional uint32 group = 9 [default = 1]; // The group size for group conv - optional uint32 stride = 10 [default = 1]; // The stride - enum PoolMethod { - MAX = 0; - AVE = 1; - STOCHASTIC = 2; - } - optional PoolMethod pool = 11 [default = MAX]; // The pooling method - optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio - - optional uint32 local_size = 13 [default = 5]; // for local response norm - optional float alpha = 14 [default = 1.]; // for local response norm - optional float beta = 15 [default = 0.75]; // for local response norm - optional float k = 22 [default = 1.]; - - // For data layers, specify the data source - optional string source = 16; - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 17 [default = 1]; - optional string meanfile = 18; - // For data layers, specify the batch size. - optional uint32 batchsize = 19; - // For data layers, specify if we would like to randomly crop an image. - optional uint32 cropsize = 20 [default = 0]; - // For data layers, specify if we want to randomly mirror data. - optional bool mirror = 21 [default = false]; - - // The blobs containing the numeric parameters of the layer - repeated BlobProto blobs = 50; - // The ratio that is multiplied on the global learning rate. If you want to - // set the learning ratio for one blob, you need to set it for all blobs. - repeated float blobs_lr = 51; - // The weight decay that is multiplied on the global weight decay. - repeated float weight_decay = 52; - - // The rand_skip variable is for the data layer to skip a few data points - // to avoid all asynchronous sgd clients to start at the same point. The skip - // point would be set as rand_skip * rand(0,1). Note that rand_skip should not - // be larger than the number of keys in the database. - optional uint32 rand_skip = 53 [default = 0]; - - // Fields related to detection (det_*) - // foreground (object) overlap threshold - optional float det_fg_threshold = 54 [default = 0.5]; - // background (non-object) overlap threshold - optional float det_bg_threshold = 55 [default = 0.5]; - // Fraction of batch that should be foreground objects - optional float det_fg_fraction = 56 [default = 0.25]; - - // optional bool OBSOLETE_can_clobber = 57 [default = true]; - - // Amount of contextual padding to add around a window - // (used only by the window_data_layer) - optional uint32 det_context_pad = 58 [default = 0]; - - // Mode for cropping out a detection window - // warp: cropped window is warped to a fixed size and aspect ratio - // square: the tightest square around the window is cropped - optional string det_crop_mode = 59 [default = "warp"]; - - // For ReshapeLayer, one needs to specify the new dimensions. - optional int32 new_num = 60 [default = 0]; - optional int32 new_channels = 61 [default = 0]; - optional int32 new_height = 62 [default = 0]; - optional int32 new_width = 63 [default = 0]; - - // Whether or not ImageLayer should shuffle the list of files at every epoch. - // It will also resize images if new_height or new_width are not zero. - optional bool shuffle_images = 64 [default = false]; - - // For ConcatLayer, one needs to specify the dimension for concatenation, and - // the other dimensions must be the same for all the bottom blobs. - // By default it will concatenate blobs along the channels dimension. - optional uint32 concat_dim = 65 [default = 1]; - - optional HDF5OutputParameter hdf5_output_param = 1001; -} - -message PReLUParameter { - // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: - // Surpassing Human-Level Performance on ImageNet Classification, 2015. - - // Initial value of a_i. Default is a_i=0.25 for all i. - optional FillerParameter filler = 1; - // Whether or not slope parameters are shared across channels. - optional bool channel_shared = 2 [default = false]; -} - -// Message that stores parameters used by DetectionOutputLayer -//message DetectionOutputParameter { -// optional int32 num_classes = 1 [default = 21]; -// optional float nms_threshold = 2 [default = 0.3]; -// optional int32 top_k = 3; -// optional float confidence_threshold = 4 [default = 0.8]; -//} - -// Message that store parameters used by PriorBoxLayer -message PriorBoxParameter { - // Encode/decode type. - enum CodeType { - CORNER = 1; - CENTER_SIZE = 2; - CORNER_SIZE = 3; - } - // Minimum box size (in pixels). Required! - repeated float min_size = 1; - // Maximum box size (in pixels). Required! - repeated float max_size = 2; - // Various of aspect ratios. Duplicate ratios will be ignored. - // If none is provided, we use default ratio 1. - repeated float aspect_ratio = 3; - // If true, will flip each aspect ratio. - // For example, if there is aspect ratio "r", - // we will generate aspect ratio "1.0/r" as well. - optional bool flip = 4 [default = true]; - // If true, will clip the prior so that it is within [0, 1] - optional bool clip = 5 [default = false]; - // Variance for adjusting the prior bboxes. - repeated float variance = 6; - // By default, we calculate img_height, img_width, step_x, step_y based on - // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely - // provided. - // Explicitly provide the img_size. - optional uint32 img_size = 7; - // Either img_size or img_h/img_w should be specified; not both. - optional uint32 img_h = 8; - optional uint32 img_w = 9; - - // Explicitly provide the step size. - optional float step = 10; - // Either step or step_h/step_w should be specified; not both. - optional float step_h = 11; - optional float step_w = 12; - - // Offset to the top left corner of each cell. - optional float offset = 13 [default = 0.5]; -} - -// Message that stores parameters used by PermutetLayer -message PermuteParameter { - // The new orders of the axes of data. Notice it should be with - // in the same range as the input data, and it starts from 0. - // Do not provide repeated order. - repeated uint32 order = 1; -} - -message NormalizeParameter { - optional bool across_spatial = 1 [default = true]; - // Initial value of scale. Default is 1.0 for all - optional FillerParameter scale_filler = 2; - // Whether or not scale parameters are shared across channels. - optional bool channel_shared = 3 [default = true]; - // Epsilon for not dividing by zero while normalizing variance - optional float eps = 4 [default = 1e-10]; -} - -// needed by ssd -message SaveOutputParameter { - // Output directory. If not empty, we will save the results. - optional string output_directory = 1; - // Output name prefix. - optional string output_name_prefix = 2; - // Output format. - // VOC - PASCAL VOC output format. - // COCO - MS COCO output format. - optional string output_format = 3; - // If you want to output results, must also provide the following two files. - // Otherwise, we will ignore saving results. - // label map file. - optional string label_map_file = 4; - // A file which contains a list of names and sizes with same order - // of the input DB. The file is in the following format: - // name height width - // ... - optional string name_size_file = 5; - // Number of test images. It can be less than the lines specified in - // name_size_file. For example, when we only want to evaluate on part - // of the test images. - optional uint32 num_test_image = 6; - // The resize parameter used in saving the data. - // optional ResizeParameter resize_param = 7; -} - -message NonMaximumSuppressionParameter { - // Threshold to be used in nms. - optional float nms_threshold = 1 [default = 0.3]; - // Maximum number of results to be kept. - optional int32 top_k = 2; - // Parameter for adaptive nms. - optional float eta = 3 [default = 1.0]; -} - -message GeneralNmsParameter { - optional int32 post_top_k = 1 ; - optional float nms_threshold = 2 [default = 0]; - optional float iou_threshold_decay = 3 [default = 1.0]; - optional float coor_scale_factor = 4 [default = 1.0]; -} - -// Message that store parameters used by DetectionOutputLayer, ssd/fasterRcnn -message DetectionOutputParameter { - optional int32 num_classes = 1; - optional bool share_location = 2 [default = true]; - optional int32 background_label_id = 3 [default = 0]; - optional NonMaximumSuppressionParameter nms_param = 4; - optional SaveOutputParameter save_output_param = 5; - optional PriorBoxParameter.CodeType code_type = 6 [default = CENTER_SIZE]; - optional bool variance_encoded_in_target = 8 [default = true]; - optional int32 keep_top_k = 7; - optional float confidence_threshold = 9; - optional float nms_threshold = 13; - optional int32 top_k = 14; - optional int32 boxes = 15 [default = 1]; - optional bool relative = 17 [default = true]; - optional float objectness_threshold = 18 [default = 0.5]; - optional float class_threshold = 19 [default = 0.5]; - repeated float biases = 20; - optional GeneralNmsParameter general_nms_param = 21; - optional float objectness_score = 22; -} -message PSROIPoolingParameter { - required float spatial_scale = 1; - required int32 output_dim = 2; // output channel number - required int32 group_size = 3; // number of groups to encode position-sensitive score maps -} -// Message that stores parameters used by FreespaceExtractLayer -message FreespaceExtractParameter { - optional float org_height = 1; -} - -// Message that stores parameters used by DetectpostprocessLayer -message PostprocessParameter { - optional float nms_thresh = 1 [default = 0.3]; - optional float conf_thresh = 2 [default = 0.5]; - optional uint32 post_nms_topn = 3 [default = 100]; - optional uint32 cls_num = 4 [default = 12]; - repeated float bbox_reg_weights = 5; -} - -// Message that stores parameters used by SpatialTransformLayer -message SpatialTransformParameter { - optional uint32 output_h = 1 [default = 0]; - optional uint32 output_w = 2 [default = 0]; - optional float border_value = 3 [default = 0]; - repeated float affine_transform = 4; - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 15 [default = DEFAULT]; -} -message ROIAlignParameter { - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - optional uint32 pooled_h = 1 [default = 0]; // The pooled output height - optional uint32 pooled_w = 2 [default = 0]; // The pooled output width - // Multiplicative spatial scale factor to translate ROI coords from their - // input scale to the scale used when pooling - optional float spatial_scale = 3 [default = 1]; - optional int32 sampling_ratio = 4 [default = -1]; - optional int32 roi_end_mode = 5 [default = 0]; -} - -message RegionParameter { - optional uint32 classes = 1 [default = 20]; // Category of classification - optional uint32 coords = 2 [default = 4]; // Coordinates of box - optional uint32 boxes = 3 [default = 1]; // Number of boxes predicted per grid - optional uint32 softmax = 4 [default = 0]; - optional string softmax_tree = 5 [default = ""]; - optional uint32 background = 6 [default = 0]; -} -message ReorgParameter{ - optional uint32 stride = 2 [default = 2]; - optional bool reverse = 1 [default = false]; -} -message ReverseParameter{ - repeated int32 axis = 1; -} -message InterpParameter{ - optional int32 height = 1 [default = 0];//Height of output - optional int32 width = 2 [default = 0];//Width of output - optional int32 zoom_factor = 3 [default = 1];//zoom factor - optional int32 shrink_factor = 4 [default = 1];//shrink factor - optional int32 pad_beg = 5 [default = 0];//padding at begin of input - optional int32 pad_end = 6 [default = 0];//padding at end of input -} -message ShuffleChannelParameter{ - optional uint32 group = 1[default = 1]; // The number of group -} -message UpsampleParameter{ - optional float scale = 1[default = 1]; - optional int32 stride = 2[default = 2]; - optional int32 stride_h = 3[default = 2]; - optional int32 stride_w = 4[default=2]; -} -message ROIPoolingParameter { - required int32 pooled_h = 1; - required int32 pooled_w = 2; - optional float spatial_scale = 3 [default=0.0625]; - optional float spatial_scale_h = 4; - optional float spatial_scale_w = 5; -} - -message YoloParameter { - optional int32 boxes = 1 [default = 3]; - optional int32 coords = 2 [default = 4]; - optional int32 classes = 3 [default = 80]; - optional string yolo_version = 4 [default = "V3"]; - optional bool softmax = 5 [default = false]; - optional bool background = 6 [default = false]; - optional bool softmaxtree = 7 [default = false]; -} - -message YoloV3DetectionOutputParameter { - optional int32 boxes = 1 [default = 3]; - optional int32 classes = 2 [default = 80]; - optional bool relative = 3 [default = true]; - optional float obj_threshold = 4 [default = 0.5]; - optional float score_threshold = 5 [default = 0.5]; - optional float iou_threshold = 6 [default = 0.45]; - optional int32 pre_nms_topn = 7 [default = 512]; - optional int32 post_nms_topn = 8 [default = 1024]; - repeated float biases_high = 9; - repeated float biases_mid = 10; - repeated float biases_low = 11; - optional int32 coords = 12 [default = 4]; - repeated float biases = 13; - optional bool resize_origin_img_to_net = 14 [default = false]; -} - -message YoloV3DetectionOutputV2Parameter { - optional int32 boxes = 1 [default = 3]; - optional int32 classes = 2 [default = 80]; - optional bool relative = 3 [default = true]; - optional float obj_threshold = 4 [default = 0.5]; - optional float score_threshold = 5 [default = 0.5]; - optional float iou_threshold = 6 [default = 0.45]; - optional int32 pre_nms_topn = 7 [default = 512]; - optional int32 post_nms_topn = 8 [default = 1024]; - repeated float biases_high = 9; - repeated float biases_mid = 10; - repeated float biases_low = 11; - optional int32 coords = 12 [default = 4]; - repeated float biases = 13; - optional bool resize_origin_img_to_net = 14 [default = false]; - optional int32 out_box_dim = 15 [default = 3]; -} - -message ProposalParameter { - optional float feat_stride = 1 [default = 16]; - optional float base_size = 2 [default = 16]; - optional float min_size = 3 [default = 16]; - repeated float ratio = 4; - repeated float scale = 5; - optional int32 pre_nms_topn = 6 [default = 3000]; - optional int32 post_nms_topn = 7 [default = 304]; - optional float iou_threshold = 8 [default = 0.7]; - optional bool output_actual_rois_num = 9 [default = false]; -} - -message FSRDetectionOutputParameter { - required int32 num_classes = 1; - required float score_threshold = 2; - required float iou_threshold = 3; - optional int32 batch_rois = 4 [default = 1]; -} - -message SSDDetectionOutputParameter { - required int32 num_classes= 1 [default = 2]; - optional bool share_location = 2 [default = true]; - optional int32 background_label_id = 3 [default = 0]; - optional float iou_threshold = 4 [default = 0.3]; - optional int32 top_k = 5 [default = 200]; - optional float eta = 6 [default = 1.0]; - optional bool variance_encoded_in_target = 7 [default = false]; - optional int32 code_type = 8 [default = 1]; - optional int32 keep_top_k = 9 [default = -1]; - optional float confidence_threshold = 10 [default = 0.0]; -} -message YoloV2DetectionOutputParameter { - optional int32 boxes = 1 [default = 5]; - optional int32 classes = 2 [default = 80]; - optional bool relative = 3 [default = true]; - optional float obj_threshold = 4 [default = 0.5]; - optional float score_threshold = 5 [default = 0.5]; - optional float iou_threshold = 6 [default = 0.45]; - optional int32 pre_nms_topn = 7 [default = 512]; - optional int32 post_nms_topn = 8 [default = 1024]; - repeated float biases = 9; - optional int32 coords = 10 [default = 4]; - optional bool resize_origin_img_to_net = 11 [default = false]; -} - -message QuantParameter { - optional float scale = 2; - optional bytes offset = 3; -} - -message BatchMatMulParameter{ - optional bool adj_x1 = 1 [default = false]; - optional bool adj_x2 = 2 [default = false]; -} - -message CondTakeParameter { - required string mode = 1; - required float val = 2; - optional float eps = 3 [default = 1e-06]; -} - -message MatrixInverseParameter { - optional bool adjoint = 1 [default = false]; -} - -message WarpPerspectiveParameter { - required int32 out_height = 1; - required int32 out_width = 2; - optional float constant = 3; - optional string border_type = 4 [default = 'BORDER_CONSTANT']; -} - -message SpatialTransformerParameter { - // How to use the parameter passed by localisation network - optional string transform_type = 1 [default = "affine"]; - // What is the sampling technique - optional string sampler_type = 2 [default = "bilinear"]; - - // If not set,stay same with the input dimension H and W - optional int32 output_H = 3; - optional int32 output_W = 4; - // If false, only compute dTheta, DO NOT compute dU - optional bool to_compute_dU = 5 [default = true]; - - // The default value for some parameters - optional double theta_1_1 = 6; - optional double theta_1_2 = 7; - optional double theta_1_3 = 8; - optional double theta_2_1 = 9; - optional double theta_2_2 = 10; - optional double theta_2_3 = 11; -} diff --git a/ge/proto/dump_task.proto b/ge/proto/dump_task.proto deleted file mode 100644 index a2411ddb..00000000 --- a/ge/proto/dump_task.proto +++ /dev/null @@ -1,113 +0,0 @@ -syntax = "proto3"; -package toolkit.dump; - -enum OutputDataType { - DT_UNDEFINED = 0; - DT_FLOAT = 1; - DT_FLOAT16 = 2; - DT_INT8 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_UINT16 = 6; - DT_INT32 = 7; - DT_INT64 = 8; - DT_UINT32 = 9; - DT_UINT64 = 10; - DT_BOOL = 11; - DT_DOUBLE = 12; - DT_STRING = 13; - DT_DUAL_SUB_INT8 = 14; - DT_DUAL_SUB_UINT8 = 15; - DT_COMPLEX64 = 16; - DT_COMPLEX128 = 17; - DT_QINT8 = 18; - DT_QINT16 = 19; - DT_QINT32 = 20; - DT_QUINT8 = 21; - DT_QUINT16 = 22; - DT_RESOURCE = 23; - DT_STRING_REF = 24; - DT_DUAL = 25; - DT_VARIANT = 26; -} - -enum OutputFormat { - FORMAT_NCHW = 0; - FORMAT_NHWC = 1; - FORMAT_ND = 2; - FORMAT_NC1HWC0 = 3; - FORMAT_FRACTAL_Z = 4; - FORMAT_NC1C0HWPAD = 5; - FORMAT_NHWC1C0 = 6; - FORMAT_FSR_NCHW = 7; - FORMAT_FRACTAL_DECONV = 8; - FORMAT_C1HWNC0 = 9; - FORMAT_FRACTAL_DECONV_TRANSPOSE = 10; - FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11; - FORMAT_NC1HWC0_C04 = 12; - FORMAT_FRACTAL_Z_C04 = 13; - FORMAT_CHWN = 14; - FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15; - FORMAT_HWCN = 16; - FORMAT_NC1KHKWHWC0 = 17; - FORMAT_BN_WEIGHT = 18; - FORMAT_FILTER_HWCK = 19; - FORMAT_HASHTABLE_LOOKUP_LOOKUPS=20; - FORMAT_HASHTABLE_LOOKUP_KEYS = 21; - FORMAT_HASHTABLE_LOOKUP_VALUE = 22; - FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23; - FORMAT_HASHTABLE_LOOKUP_HITS=24; - FORMAT_C1HWNCoC0 = 25; - FORMAT_MD = 26; - FORMAT_NDHWC = 27; - FORMAT_FRACTAL_ZZ = 28; - FORMAT_FRACTAL_NZ = 29; - FORMAT_RESERVED = 30; -} - -message OriginalOp { - string name = 1; - uint32 output_index = 2; - OutputDataType data_type = 3; - OutputFormat format = 4; -} - -message Shape { - repeated uint64 dim = 1; -} - -message OpOutput { - OutputDataType data_type = 1; - OutputFormat format = 2; - Shape shape = 3; - OriginalOp original_op = 4; // the original op corresponding to the output - bytes data = 5; - uint64 size = 6; -} - -message OpInput { - OutputDataType data_type = 1; - OutputFormat format = 2; - Shape shape = 3; - bytes data = 4; - uint64 size = 5; -} - -enum BufferType { - L1 = 0; -} - -message OpBuffer { - BufferType buffer_type = 1; - bytes data = 2; - uint64 size = 3; -} - -message DumpData{ - string version = 1; - uint64 dump_time = 2; - repeated OpOutput output = 3; - repeated OpInput input = 4; - repeated OpBuffer buffer = 5; - string op_name = 6; -} diff --git a/ge/proto/fusion_model.proto b/ge/proto/fusion_model.proto deleted file mode 100755 index c92c5581..00000000 --- a/ge/proto/fusion_model.proto +++ /dev/null @@ -1,21 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -import "om.proto"; - -package domi; - -message FusionModelDef { - string version = 1; - repeated OpDef fusion_op = 2; -} \ No newline at end of file diff --git a/ge/proto/fwk_adapter.proto b/ge/proto/fwk_adapter.proto deleted file mode 100644 index 9335c926..00000000 --- a/ge/proto/fwk_adapter.proto +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package aicpu.FWKAdapter; -option cc_enable_arenas = true; - - -// Defines an struct for input and output. -message TensorDataInfo { - - // value DataType - uint32 dtype = 1; - - // shape dim - repeated int64 dim = 2; - - // data point addr - int64 data_addr = 3; -} - -message KernelRunParam { - // input - repeated TensorDataInfo input = 1; - // output - repeated TensorDataInfo output = 2; -} - diff --git a/ge/proto/ge_api.proto b/ge/proto/ge_api.proto deleted file mode 100755 index 331c5aea..00000000 --- a/ge/proto/ge_api.proto +++ /dev/null @@ -1,88 +0,0 @@ -syntax = "proto3"; -package ge.api_pb; - -import "ge_ir.proto"; - -// GE initialize -message GEInitialize { - map options = 1; -}; - -// initialize response -message GEInitializeResponse { - uint32 status = 1; - uint32 clientId = 2; -}; - -// GE finalize -message GEFinalize { - bool final = 1; - uint32 clientId = 2; -}; - -message GEFinalizeResponse { - uint32 status = 1; -}; - -// GE Session -message CreateSession{ - map options = 1; -}; - -message CreateSessionResponse { - uint32 status = 1; - uint64 sessionId = 2; -}; - -//GE AddGraph -//model serialize :: serializegraph -message SessionAddGraph{ - uint32 graphId = 1; - uint64 sessionId = 2; - ge.proto.GraphDef graph = 3; -}; - -message SessionAddGraphResponse { - uint32 status = 1; -}; - -//GE SessionRemoveGraph -message SessionRemoveGraph{ - uint32 graphId = 1; - uint64 sessionId = 2; -}; - -message SessionRemoveGraphResponse { - uint32 status = 1; -}; - -message SessionRunGraph{ - uint32 graphId = 1; - uint64 sessionId = 2; - repeated ge.proto.TensorDef tensor = 3; -}; - -message SessionBuildGraph{ - uint32 graphId = 1; - uint64 sessionId = 2; - repeated ge.proto.TensorDef tensor = 3; - string savePath = 4; -}; - -message SessionRunGraphResponse { - uint32 status = 1; - repeated ge.proto.TensorDef tensor = 2; -}; - -message SessionBuildGraphResponse { - uint32 status = 1; -}; - -message DestroySession{ - bool final = 1; - uint64 sessionId = 2; -}; - -message DestroySessionResponse { - uint32 status = 1; -}; diff --git a/ge/proto/ge_ir.proto b/ge/proto/ge_ir.proto deleted file mode 100644 index c0ef3071..00000000 --- a/ge/proto/ge_ir.proto +++ /dev/null @@ -1,193 +0,0 @@ -syntax = "proto3"; - -package ge.proto; - -enum DataType -{ - DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set. - DT_FLOAT = 1; // float type - DT_FLOAT16 = 2; // fp16 type - DT_INT8 = 3; // int8 type - DT_UINT8 = 4; // uint8 type - DT_INT16 = 5; // int16 type - DT_UINT16 = 6; // uint16 type - DT_INT32 = 7; // - DT_INT64 = 8; // int64 type - DT_UINT32 = 9; // unsigned int32 - DT_UINT64 = 10; // unsigned int64 - DT_BOOL = 11; // bool type - DT_DOUBLE = 12; // double type - DT_STRING = 13; // string type - DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */ - DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */ - DT_COMPLEX64 = 16; // complex64 type - DT_COMPLEX128 = 17; // complex128 type - DT_QINT8 = 18; // qint8 type - DT_QINT16 = 19; // qint16 type - DT_QINT32 = 20; // qint32 type - DT_QUINT8 = 21; // quint8 type - DT_QUINT16 = 22; // quint16 type - DT_RESOURCE = 23; // resource type - DT_STRING_REF = 24; // string_ref type - DT_DUAL = 25; /**< dual output type */ - DT_VARIANT = 26; // variant type - DT_BF16 = 27; // bf16 type - DT_INT4 = 28; // int4 type -} - -message AttrDef -{ - message ListValue - { - enum ListValueType{ - VT_LIST_NONE = 0; - VT_LIST_STRING = 1; - VT_LIST_INT = 2; - VT_LIST_FLOAT = 3; - VT_LIST_BOOL = 4; - VT_LIST_BYTES = 5; - VT_LIST_TENSOR_DESC = 6; - VT_LIST_TENSOR = 7; - VT_LIST_GRAPH = 8; - VT_LIST_NAMED_ATTRS = 9; - VT_LIST_DATA_TYPE = 10; - } - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3; // "list(int)" - repeated float f = 4; // "list(float)" - repeated bool b = 5; // "list(bool)" - repeated bytes bt = 7; - repeated TensorDescriptor td = 8; - repeated TensorDef t = 9; - repeated GraphDef g = 10; - repeated NamedAttrs na = 11; - repeated int64 dt = 12; // list ge::DataType - - ListValueType val_type = 20; - } - - message ListListInt{ - message ListInt{ - repeated int64 list_i = 1; // list int - } - repeated ListInt list_list_i = 1; // list list int - } - - oneof value - { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; // Used to support attr nesting - TensorDescriptor td = 11; // GeTensorDesc type - TensorDef t = 12; // GeTensor type - GraphDef g = 13; // Graph type - ListListInt list_list_int = 14; // List List Int type - int64 dt = 15; // ge::DataType - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs -{ - string name = 1; - map attr = 2; -} - -// Shape / dimension description, using row-major order -message ShapeDef -{ - repeated int64 dim = 1; // Size of each dimension -} - -// Multidimensional data description -message TensorDescriptor -{ - string name = 1; // Optional parameter, tensor name - - DataType dtype = 2; // tensor datatype - ShapeDef shape = 3; // Shape / dimension - string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND" - - bool has_out_attr = 9; - int64 size = 10; - int64 weight_size = 11; - bool reuse_input = 12; - bool output_tensor = 13; - string device_type = 14; - bool input_tensor =15; - int64 real_dim_cnt = 16; - int64 reuse_input_index = 17; - int64 data_offset = 18; - int64 cmps_size = 19; - string cmps_tab = 20; - int64 cmps_tab_offset = 21; - - map attr = 5; // Set of extra parameter fields -} - -// GeTensor definition -message TensorDef -{ - TensorDescriptor desc = 1; // Tensor description - bytes data = 2; // Tensor data -} - - -// Operator description -message OpDef -{ - string name = 1; // name - string type = 2; // type - - repeated string input = 5; // input original op name + outgoing index. op_name:index - - map attr = 10; // Set of operator parameter fields - - bool has_out_attr = 20; - int64 id = 21; - int64 stream_id =22; - repeated string input_name = 23; - repeated string src_name = 24; - repeated int64 src_index = 25; - repeated string dst_name = 26; - repeated int64 dst_index = 27; - repeated int64 input_i = 28; - repeated int64 output_i = 29; - repeated int64 workspace = 30; - repeated int64 workspace_bytes = 31; - repeated bool is_input_const = 32; - repeated TensorDescriptor input_desc = 33; - repeated TensorDescriptor output_desc = 34; - repeated string subgraph_name = 35; -} - -// Graph definition -message GraphDef -{ - string name = 1; // name - - repeated string input = 4; // Graph input - repeated string output = 5; // Graph output - - repeated OpDef op = 6; // List of operators - - map attr = 11; // Extended field -} - -// model definition -message ModelDef -{ - string name = 1; // name - uint32 version = 2; // IR Proto verion - string custom_version = 3; // User model version number, passed in by user - - repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef - - map attr = 11; // Extended field -} - diff --git a/ge/proto/insert_op.proto b/ge/proto/insert_op.proto deleted file mode 100644 index 7d708865..00000000 --- a/ge/proto/insert_op.proto +++ /dev/null @@ -1,140 +0,0 @@ -syntax = "proto3"; - -package domi; - -message InsertNewOps { - repeated AippOpParams aipp_op = 1; - repeated MultiShapeOpParams multi_shape_op = 2; -} - -message AippOpParams { - enum InputFormat { - UNDEFINED = 0; - YUV420SP_U8 = 1; - XRGB8888_U8 = 2; - RGB888_U8 = 3; - YUV400_U8 = 4; - NC1HWC0DI_FP16 = 5; - NC1HWC0DI_S8 = 6; - ARGB8888_U8 = 7; - YUYV_U8 = 8; - YUV422SP_U8 = 9; - AYUV444_U8 = 10; - RAW10 = 11; - RAW12 = 12; - RAW16 = 13; - RAW24 = 14; - RGB16 = 15; - RGB20 = 16; - RGB24 = 17; - RGB8_IR = 18; - RGB16_IR = 19; - RGB24_IR = 20; - } - - enum AippMode { - undefined = 0; - static = 1; - dynamic = 2; - } - - // AIPPģʽ£¬Çø·Ö¾²Ì¬AIPPºÍ¶¯Ì¬AIPP - AippMode aipp_mode = 1; - - // related_input_rank²ÎÊýΪ±ØÌÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§>=0, <=ÊäÈëDataËã×ӵĸöÊý£¬Ä¬ÈÏֵΪ0¡£ - // ±êʶ¶ÔÄ£Ð͵ĵڼ¸¸öÊäÈë×öAIPP´¦Àí£¬ÀýÈçÄ£ÐÍÓÐÁ½¸öÊäÈ룬ÐèÒª¶ÔµÚ2¸öÊäÈë×öAIPP£¬ÔòÅäÖÃrelated_input_rankΪ1¡£ - uint32 related_input_rank = 2; - - // related_input_name is optional and the top name of data node which inserts aipp - string related_input_name = 6; - - // input_edge_idx²ÎÊýΪ¿ÉÑ¡£¬ÀàÐÍΪÕûÐÍ£¬ÅäÖ÷¶Î§Îª>=0¡£ - // ÅäÖøòÎÊýµÄ×÷Óã¬ÔÚÓÚ¶ÔDataËã×Ó²»Í¬µÄÊä³ö×ö²»Í¬µÄAIPP´¦Àí£¬Èç¹û¸Ã²ÎÊýûÓÐÅäÖã¬Ä¬È϶Ôrelated_input_rankÖ¸¶¨µÄÄ£ÐÍÊäÈëµÄËùÓÐÊä³ö±ß×öAIPP¡£ - // ÅäÖÃÖµ <= DataËã×ÓÊä³ö±ßµÄ¸öÊý¡£ - repeated uint32 input_edge_idx = 3; - - // [Begin] ¶¯Ì¬AIPP²ÎÊý£¬ÅäÖþ²Ì¬AIPPʱÎÞЧ - uint32 max_src_image_size = 4; - - // ÊÇ·ñÖ§³ÖÐýת¡£Ä¬Èϲ»Ö§³Ö£¬¿ªÆôÖ§³ÖÐýתʱ£¬»áÓжîÍâµÄ¿Õ¼äºÍÐÔÄÜËðʧ - bool support_rotation = 5; - - // [End] ¶¯Ì¬AIPP²ÎÊý - - - // [Begin] ¾²Ì¬AIPP²ÎÊý£¬ÅäÖö¯Ì¬AIPPʱÎÞЧ - InputFormat input_format = 51; - bool csc_switch = 52; - float cpadding_value = 53; - bool rbuv_swap_switch = 54; - bool ax_swap_switch = 55; - bool single_line_mode = 56; - - int32 src_image_size_w = 57; - int32 src_image_size_h = 58; - - bool crop = 59; - int32 load_start_pos_w = 60; - int32 load_start_pos_h = 61; - int32 crop_size_w = 62; - int32 crop_size_h = 63; - - bool resize = 64; - int32 resize_output_w = 65; - int32 resize_output_h = 66; - - bool padding = 67; - int32 left_padding_size = 68; - int32 right_padding_size = 69; - int32 top_padding_size = 70; - int32 bottom_padding_size = 71; - float padding_value = 72; - - int32 mean_chn_0 = 10; - int32 mean_chn_1 = 11; - int32 mean_chn_2 = 12; - int32 mean_chn_3 = 19; - float min_chn_0 = 13; - float min_chn_1 = 14; - float min_chn_2 = 15; - float min_chn_3 = 20; - repeated float var_reci_chn_0 = 16; - repeated float var_reci_chn_1 = 17; - repeated float var_reci_chn_2 = 18; - repeated float var_reci_chn_3 = 21; - - repeated int32 matrix_r0c0 = 30; - repeated int32 matrix_r0c1 = 31; - repeated int32 matrix_r0c2 = 32; - repeated int32 matrix_r1c0 = 33; - repeated int32 matrix_r1c1 = 34; - repeated int32 matrix_r1c2 = 35; - repeated int32 matrix_r2c0 = 36; - repeated int32 matrix_r2c1 = 37; - repeated int32 matrix_r2c2 = 38; - repeated int32 output_bias_0 = 39; - repeated int32 output_bias_1 = 40; - repeated int32 output_bias_2 = 41; - repeated int32 input_bias_0 = 42; - repeated int32 input_bias_1 = 43; - repeated int32 input_bias_2 = 44; - - // [End] ¾²Ì¬AIPP²ÎÊý - - // The n number that is used for raw/rgbir data into f16 transformation. - // The transformation equation is x/(2^n). If set to 0, no transform is performed. - uint32 raw_rgbir_to_f16_n = 45; -} - -message MultiShapeOpParams { - enum MultiShapeMode { - batch = 0; //¶¯Ì¬batch - resolution = 1; //¶¯Ì¬·Ö±æÂÊ£¬À©Õ¹Óà - } - - MultiShapeMode mode = 1; //Ëã×Óģʽ - uint32 related_input_rank = 2; //ÐÂÔöËã×Ó²åÈëµ½ÄĸöÊäÈë - - - repeated uint32 batch_list = 11; //batch_listÖµ£¬batch_listµÄ¸öÊýÊÇ2µ½8Ö®¼ä -} diff --git a/ge/proto/om.proto b/ge/proto/om.proto deleted file mode 100644 index e15e5f80..00000000 --- a/ge/proto/om.proto +++ /dev/null @@ -1,396 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -enum TargetType -{ - MINI = 0; - TINY = 1; - LITE = 2; -} - -// offline model -message ModelDef { - string name = 1; - uint32 version = 2; - - uint64 memory_size = 10; - uint32 stream_num = 11; - uint32 event_num = 12; - uint64 weight_size = 13; - uint32 label_num = 15; - repeated OpDef op = 20; - TargetType target_type = 23; - - map attr = 30; -}; - -// operator define -message OpDef { - string name = 1; - string type = 2; - - uint32 id = 3; - uint32 stream_id = 4; - - repeated string input_name = 5; - - repeated string src_name = 8; - repeated int32 src_index = 9; - repeated int64 input = 10; - repeated int64 output = 11; - repeated TensorDescriptor input_desc = 12; - repeated TensorDescriptor output_desc = 13; - repeated WeightDef weights = 14; - repeated string dst_name = 15; - repeated int32 dst_index = 16; - - repeated int64 workspace = 20; - repeated uint32 workspace_bytes = 21; - - repeated string weight_name = 22; - repeated bool is_input_const = 23; - - map attr = 30; - - QuantizeFactorParams quantize_factor = 31; - - oneof op_params { - // start at 100 here - SendOpParams sender_param = 100; - RecvOpParams receiver_param = 200; - ConvolutionOpParams convolution_param = 300; - PoolingOpParams pooling_param = 400; - EltwiseOpParams eltwise_param = 500; - BatchNormOpParams batchnorm_param = 600; - ScaleOpParams scale_param = 700; - FullConnectionOpParams full_connection_param = 800; - SoftmaxOpParams softmax_param = 900; - ActivationOpParams activation_param = 1000; - ReshapeOpParams reshape_param = 1100; - } -}; - -message SendOpParams { - uint32 event_id = 1; -}; - -message RecvOpParams { - uint32 event_id = 1; -}; - -enum QuantizeScaleType -{ - VECTOR_SCALE = 0; - SCALAR_SCALE = 1; -} - -enum QuantizeScaleMode -{ - NORMAL_MODE = 0; - SQRT_MODE = 1; -} - -enum QuantizeAlgorithm -{ - NON_OFFSET_ALGO = 0; - HALF_OFFSET_ALGO = 1; - ALL_OFFSET_ALGO = 2; -} -message QuantizeFactor -{ - QuantizeScaleMode scale_mode = 1; - bytes scale_value = 2; - int64 scale_offset = 3; - bytes offset_data_value = 4; - int64 offset_data_offset = 5; - bytes offset_weight_value = 6; - int64 offset_weight_offset = 7; - bytes offset_pad_value = 8; - int64 offset_pad_offset = 9; -}; - -message QuantizeCalcFactor -{ - bytes offsetw = 1; - int64 offsetw_offset = 2; - bytes offsetd = 3; - int64 offsetd_offset = 4; - bytes scalereq = 5; - int64 scaledreq_offset = 6; - bytes offsetdnext = 7; - int64 offsetdnext_offset = 8; -} - -message QuantizeFactorParams -{ - QuantizeAlgorithm quantize_algo = 1; - QuantizeScaleType scale_type = 2; - QuantizeFactor quantize_param = 3; - QuantizeFactor dequantize_param = 4; - QuantizeFactor requantize_param = 5; - QuantizeCalcFactor quantizecalc_param = 6; -}; - -message ConvolutionOpParams { - int32 mode = 1; - int32 algo = 2; - int32 pad_mode = 3; - uint32 group = 4; - uint32 num_output = 5; - - repeated uint32 pad = 10; - repeated uint32 stride = 11; - repeated uint32 dilation = 12; - repeated uint32 kernel = 13; - - float alpha = 20; - float beta = 21; - - WeightDef filter = 40; - WeightDef bias = 41; - - bool relu_flag = 62; - repeated uint32 adj = 70; - repeated uint32 target_shape = 71; - repeated uint32 before_pad = 72; -}; - -message PoolingOpParams { - int32 mode = 1; - int32 nan_opt = 2; - int32 pad_mode = 3; - bool global_pooling = 4; - - repeated uint32 window = 10; - repeated uint32 pad = 11; - repeated uint32 stride = 12; - bool ceil_mode = 13; - int32 data_mode = 14; - - float alpha = 20; - float beta = 21; - repeated uint32 before_pad = 22; -}; - -message EltwiseOpParams { - int32 mode = 1; - repeated float coeff = 2; - float alpha = 3; - float beta = 4; - repeated WeightDef weight = 5; - bool relu_flag = 6; -}; - -message ActivationOpParams { - int32 mode = 1; - float coef = 2; - float alpha = 3; - float beta = 4; -}; - -message BatchNormOpParams { - int32 mode = 1; - - float alpha = 2; - float beta = 3; - double epsilon = 4;//optinal,[default = 1e-5] - bool use_global_stats = 5; //optinal,by default true,testing mode - float moving_average_fraction = 6; //optinal,[default = .999]; - - WeightDef estimated_mean = 7; - WeightDef estimated_variance = 8; - - WeightDef scale = 9; - WeightDef bias = 10; -}; - -message ScaleOpParams { - WeightDef scale = 1; - WeightDef bias = 2; -}; - -message ReshapeOpParams { - float alpha = 1; - float beta = 2; - ShapeDef shape = 3; - int32 axis = 4; - int32 num_axes = 5; - int32 format = 6; -}; - -message SoftmaxOpParams { - int32 algo = 1; - int32 mode = 2; - float alpha = 3; - float beta = 4; -}; - -message FullConnectionOpParams { - WeightDef filter = 1; - WeightDef bias = 2; - uint32 num_output = 3; - bool relu_flag = 12; -}; - -message FlattenOpParams { - float alpha = 1; - float beta = 2; - int32 start_axis = 3; - int32 end_axis = 4; -} - -message AddLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message MulLimitedOpParams { - float alpha = 1; - float beta = 2; - int32 axis = 3; - bool broadcast = 4; - - repeated WeightDef weight = 10; -}; - -message AddOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message MulOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message SubOpParams { - float alpha = 1; - float beta = 2; - - repeated WeightDef weight = 10; -}; - -message BiasAddOpParams { - float alpha = 1; - float beta = 2; - - WeightDef bias = 10; -}; - -message MatMulOpParams { - float alpha = 1; - float beta = 2; - bool transposeX = 3; - bool transposeW = 4; - - WeightDef filter = 10; - WeightDef bias = 12; -}; - -message RsqrtOpParams { - float alpha = 1; - float beta = 2; -}; - - -message WeightDef { - int32 format = 1; - int32 data_type = 2; - ShapeDef shape = 3; - bytes data = 4; - int64 data_offset = 5; - uint32 cmps_size = 6; - bytes cmps_tab = 7; - int64 cmps_tab_offset = 10; - CompressInfo cmps_info = 8; - AllOffsetQuantizeInfo alloffset_quantize_info = 11; -} - -message ShapeDef { - repeated int64 dim = 1; -} - -enum DeviceType { - NPU = 0; // In default, we will use NPU. - CPU = 1; // CPU -} - -message AllOffsetQuantizeInfo { - float scale = 1; - int32 offset = 2; -} - -message TensorDescriptor { - int32 format = 1; - int32 data_type = 2; - repeated int64 dim = 3; - uint32 size = 4; - bool reuse_input = 5; - bool output_tensor = 7; - DeviceType device_type = 8; - bool input_tensor = 9; - uint32 real_dim_cnt = 10; - uint32 reuse_input_index = 11; - AllOffsetQuantizeInfo alloffset_quantize_info = 12; -} - -message CompressInfo { - int32 blockRow = 1; // block row - int32 blockCol = 2; // block col - int32 fractalK = 3; // fractal K - int32 fractalN = 4; // fractal N - int32 lastFractalK = 5; // K of last fractal - int32 lastFractalN = 6; // N of last fractal - int32 cubeSize = 7; // cube's length - int32 loadDir = 8; // data load directtiono 0:col load 1:row load -} - -message AttrDef { - message ListValue { - repeated string s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated uint32 u = 6 [packed = true]; // "list(uint)" - repeated bytes bt = 7; - } - - oneof value { - string s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - uint32 u = 6; // "uint32" - bytes bt = 7; - ListValue list = 1; // any "list(...)" - NamedAttrs func = 10; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NamedAttrs { - string name = 1; - map attr = 2; -} - diff --git a/ge/proto/op_mapping.proto b/ge/proto/op_mapping.proto deleted file mode 100644 index d626eb49..00000000 --- a/ge/proto/op_mapping.proto +++ /dev/null @@ -1,75 +0,0 @@ -syntax = "proto3"; -package toolkit.aicpu.dump; - -message Shape { - repeated uint64 dim = 1; -} - -message Output { - int32 data_type = 1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - string original_name = 5; - int32 original_output_index = 6; - int32 original_output_data_type = 7; - int32 original_output_format = 8; - uint64 size = 9; - Shape origin_shape = 10; -} - -message Input { - int32 data_type =1; - int32 format = 2; - Shape shape = 3; - uint64 address = 4; - uint64 size = 5; - Shape origin_shape = 6; -} - -enum BufferType { - L1 = 0; -} - -message OpBuffer { - BufferType buffer_type = 1; - uint64 address = 2; - uint64 size = 3; -} - -message Op { - string op_name = 1; - string op_type = 2; -} - -message Task { - uint32 task_id = 1; - uint32 stream_id = 2; - Op op = 3; - repeated Output output = 4; - bool end_graph = 5; - repeated Input input = 6; - repeated OpBuffer buffer = 7; -} - -message OpMappingInfo { - string dump_path = 1; - oneof model_name_param { - string model_name = 2; - } - oneof model_id_param { - uint32 model_id = 3; - } - oneof step_id { - uint64 step_id_addr = 4; - } - oneof iterations_per_loop { - uint64 iterations_per_loop_addr = 5; - } - oneof loop_cond { - uint64 loop_cond_addr = 6; - } - uint32 flag = 7; // 0x01 load, 0x00 unload - repeated Task task = 8; - string dump_step = 9; -} \ No newline at end of file diff --git a/ge/proto/optimizer_priority.proto b/ge/proto/optimizer_priority.proto deleted file mode 100644 index 769619cf..00000000 --- a/ge/proto/optimizer_priority.proto +++ /dev/null @@ -1,7 +0,0 @@ -syntax = "proto3"; -package ge.optimizers; - -// Default: GE>FE>AICPU -message Priority{ - repeated string optimizer = 1; -} \ No newline at end of file diff --git a/ge/proto/task.proto b/ge/proto/task.proto deleted file mode 100644 index 0da5631e..00000000 --- a/ge/proto/task.proto +++ /dev/null @@ -1,179 +0,0 @@ -/* Copyright (C) 2018. Huawei Technologies Co., Ltd. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the Apache License Version 2.0.You may not use this file except in compliance with the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Apache License for more details at - * http://www.apache.org/licenses/LICENSE-2.0 - */ -syntax = "proto3"; - -package domi; - -message ModelTaskDef { - string version = 1; - - map attr = 9; // Extended field - repeated TaskDef task = 10; - - uint64 memory_size = 11; - uint32 stream_num = 12; - uint32 event_num = 13; - uint64 weight_size = 14; - - repeated bytes op = 15; // input/output opdef in bytes - - uint64 base_addr = 16; // base addr - uint64 weight_addr = 17; // weight addr - uint32 batch_num = 18; -} - - -message TaskDef { - uint32 id = 1; - uint32 type = 2; - - uint32 stream_id = 10; - uint32 event_id = 11; - - KernelDef kernel = 20; - KernelExDef kernel_ex = 21; - KernelHcclDef kernel_hccl = 25; - EventExDef event_ex = 26; - LogTimeStampDef log_timestamp = 28; - - uint32 label_id = 30; - - MemcpyAsyncDef memcpy_async = 31; - StreamSwitchDef stream_switch = 32; - StreamActiveDef stream_active = 33; - bytes private_def = 34; - uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future - StreamSwitchNDef stream_switch_n = 36; - - LabelSetDef label_set = 37; - LabelGotoExDef label_goto_ex = 38; - LabelSwitchByIndexDef label_switch_by_index = 39; - KernelDefWithHandle kernel_with_handle = 40; -} - -message KernelDef { - KernelContext context = 1; - - string stub_func = 10; - uint32 block_dim = 11; - uint32 args_size = 12; - bytes args = 13; - bytes sm_desc = 14; - bytes flowtable = 15; - string so_name = 16; - string kernel_name = 17; - bytes kernel_ext_info = 18; - uint32 kernel_ext_info_size = 19; -} - -message KernelDefWithHandle { - KernelContext context = 1; - - uint64 handle = 10; - string dev_func = 11; - uint32 block_dim = 12; - uint32 args_size = 13; - bytes args = 14; - bytes sm_desc = 15; - string original_kernel_key = 16; - string node_info = 17; -} - -message KernelContext { - uint32 kernel_type = 1; - uint32 op_id = 2; // OP type in CCE - uint32 kernel_func_id = 3; - uint32 op_index = 4; // TE/Custom operator - bool is_flowtable = 5; // Identify whether args is a flowtable structure - bytes args_offset = 6; // args offset information - uint32 args_count = 7; // args count - repeated uint32 origin_op_index = 8; -} - - -message KernelExDef { - uint32 flags = 1; - - uint32 op_index = 4; - uint32 args_size = 12; - bytes args = 13; - bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput - uint32 task_info_size = 15; - bytes kernel_ext_info = 16; - uint32 kernel_ext_info_size = 17; -} - - -message KernelHcclDef { - uint32 op_index = 8; - string hccl_type = 9; -} - - -message EventExDef { - uint32 op_index = 1; - uint32 event_type = 2; -} - -message LogTimeStampDef { - uint64 logid = 1; - bool notify = 2; - uint32 flat = 3; -} - -message MemcpyAsyncDef { - uint64 dst = 1; - uint64 dst_max = 2; - uint64 src = 3; - uint64 count = 4; - uint32 kind = 5; - uint32 op_index = 6; -} - -message StreamSwitchDef { - uint32 op_index = 1; - uint32 true_stream_id = 2; - int64 value = 3; - uint64 value_ptr = 4; - uint32 data_type = 5; -} - -message StreamActiveDef { - uint32 op_index = 1; - uint32 active_stream_id = 2; -} - -message StreamSwitchNDef { - uint32 op_index = 1; - uint32 size = 2; - repeated int64 target_value = 3; - repeated uint32 true_stream_id = 4; - uint32 element_size = 5; - uint32 data_type = 6; -} - -message LabelSetDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelGotoExDef { - uint32 op_index = 1; - uint32 label_id = 2; - uint32 model_id = 3; -} - -message LabelSwitchByIndexDef { - uint32 op_index = 1; - uint32 label_max = 2; -} diff --git a/ge/proto/tensorflow/attr_value.proto b/ge/proto/tensorflow/attr_value.proto deleted file mode 100644 index 438d7163..00000000 --- a/ge/proto/tensorflow/attr_value.proto +++ /dev/null @@ -1,70 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "AttrValueProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; - -// Protocol buffer representing the value for an attr used to configure an Op. -// Comment indicates the corresponding attr type. Only the field matching the -// attr type may be filled. -message AttrValue { - // LINT.IfChange - message ListValue { - repeated bytes s = 2; // "list(string)" - repeated int64 i = 3 [packed = true]; // "list(int)" - repeated float f = 4 [packed = true]; // "list(float)" - repeated bool b = 5 [packed = true]; // "list(bool)" - repeated DataType type = 6 [packed = true]; // "list(type)" - repeated TensorShapeProto shape = 7; // "list(shape)" - repeated TensorProto tensor = 8; // "list(tensor)" - repeated NameAttrList func = 9; // "list(attr)" - } - // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) - - oneof value { - bytes s = 2; // "string" - int64 i = 3; // "int" - float f = 4; // "float" - bool b = 5; // "bool" - DataType type = 6; // "type" - TensorShapeProto shape = 7; // "shape" - TensorProto tensor = 8; // "tensor" - ListValue list = 1; // any "list(...)" - - // "func" represents a function. func.name is a function's name or - // a primitive op's name. func.attr.first is the name of an attr - // defined for that function. func.attr.second is the value for - // that attr in the instantiation. - NameAttrList func = 10; - - // This is a placeholder only used in nodes defined inside a - // function. It indicates the attr value will be supplied when - // the function is instantiated. For example, let us suppose a - // node "N" in function "FN". "N" has an attr "A" with value - // placeholder = "foo". When FN is instantiated with attr "foo" - // set to "bar", the instantiated node N's attr A will have been - // given the value "bar". - string placeholder = 9; - } -} - -// A list of attr names and their values. The whole list is attached -// with a string name. E.g., MatMul[T=float]. -message NameAttrList { - string name = 1; - map attr = 2; -} diff --git a/ge/proto/tensorflow/function.proto b/ge/proto/tensorflow/function.proto deleted file mode 100644 index 44681e32..00000000 --- a/ge/proto/tensorflow/function.proto +++ /dev/null @@ -1,108 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "FunctionProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; -import "node_def.proto"; -import "op_def.proto"; - -// A library is a set of named functions. -message FunctionDefLibrary { - repeated FunctionDef function = 1; - repeated GradientDef gradient = 2; -} - -// A function can be instantiated when the runtime can bind every attr -// with a value. When a GraphDef has a call to a function, it must -// have binding for every attr defined in the signature. -// * device spec, etc. -message FunctionDef { - // The definition of the function's name, arguments, return values, - // attrs etc. - OpDef signature = 1; - - // Attributes specific to this function definition. - map attr = 5; - - // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. - reserved 2; - - // In both of the following fields, there is the need to specify an - // output that is used as either the input to another node (in - // `node_def`) or as a return value of the function (in `ret`). - // Unlike the NodeDefs in GraphDef, we need to be able to specify a - // list in some cases (instead of just single outputs). Also, we - // need to be able to deal with lists of unknown length (so the - // output index may not be known at function definition time). So - // we use the following format instead: - // * "fun_in" where "fun_in" is the name of a function input arg in - // the `signature` field above. This represents that input, whether - // it is a single tensor or a list. - // * "fun_in:0" gives the first element of a function input arg (a - // non-list input is considered a list of length 1 for these - // purposes). - // * "node:out" where "node" is the name of a node in `node_def` and - // "out" is the name one of its op's output arguments (the name - // comes from the OpDef of the node's op). This represents that - // node's output, whether it is a single tensor or a list. - // Note: We enforce that an op's output arguments are never - // renamed in the backwards-compatibility test. - // * "node:out:0" gives the first element of a node output arg (a - // non-list output is considered a list of length 1 for these - // purposes). - // - // NOT CURRENTLY SUPPORTED (but may be in the future): - // * "node:out:-1" gives last element in a node output list - // * "node:out:1:" gives a list with all but the first element in a - // node output list - // * "node:out::-1" gives a list with all but the last element in a - // node output list - - // The body of the function. Unlike the NodeDefs in a GraphDef, attrs - // may have values of type `placeholder` and the `input` field uses - // the "output" format above. - - // By convention, "op" in node_def is resolved by consulting with a - // user-defined library first. If not resolved, "func" is assumed to - // be a builtin op. - repeated NodeDef node_def = 3; - - // A mapping from the output arg names from `signature` to the - // outputs from `node_def` that should be returned by the function. - map ret = 4; -} - -// GradientDef defines the gradient function of a function defined in -// a function library. -// -// A gradient function g (specified by gradient_func) for a function f -// (specified by function_name) must follow the following: -// -// The function 'f' must be a numerical function which takes N inputs -// and produces M outputs. Its gradient function 'g', which is a -// function taking N + M inputs and produces N outputs. -// -// I.e. if we have -// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N), -// then, g is -// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N, -// dL/dy1, dL/dy2, ..., dL/dy_M), -// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the -// loss function). dL/dx_i is the partial derivative of L with respect -// to x_i. -message GradientDef { - string function_name = 1; // The function name. - string gradient_func = 2; // The gradient function's name. -} diff --git a/ge/proto/tensorflow/graph.proto b/ge/proto/tensorflow/graph.proto deleted file mode 100644 index 73bfc6ee..00000000 --- a/ge/proto/tensorflow/graph.proto +++ /dev/null @@ -1,64 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "GraphProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "node_def.proto"; -import "function.proto"; -import "versions.proto"; - -// Represents the graph of operations -message GraphDef { - repeated NodeDef node = 1; - - // Compatibility versions of the graph. See core/public/version.h for version - // history. The GraphDef version is distinct from the TensorFlow version, and - // each release of TensorFlow will support a range of GraphDef versions. - VersionDef versions = 4; - - // Deprecated single version field; use versions above instead. Since all - // GraphDef changes before "versions" was introduced were forward - // compatible, this field is entirely ignored. - int32 version = 3 [deprecated = true]; - - // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET. - // - // "library" provides user-defined functions. - // - // Naming: - // * library.function.name are in a flat namespace. - // NOTE: We may need to change it to be hierarchical to support - // different orgs. E.g., - // { "/google/nn", { ... }}, - // { "/google/vision", { ... }} - // { "/org_foo/module_bar", { ... }} - // map named_lib; - // * If node[i].op is the name of one function in "library", - // node[i] is deemed as a function call. Otherwise, node[i].op - // must be a primitive operation supported by the runtime. - // - // - // Function call semantics: - // - // * The callee may start execution as soon as some of its inputs - // are ready. The caller may want to use Tuple() mechanism to - // ensure all inputs are ready in the same time. - // - // * The consumer of return values may start executing as soon as - // the return values the consumer depends on are ready. The - // consumer may want to use Tuple() mechanism to ensure the - // consumer does not start until all return values of the callee - // function are ready. - FunctionDefLibrary library = 2; -}; diff --git a/ge/proto/tensorflow/graph_library.proto b/ge/proto/tensorflow/graph_library.proto deleted file mode 100644 index 7bca0838..00000000 --- a/ge/proto/tensorflow/graph_library.proto +++ /dev/null @@ -1,22 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; - -import "graph.proto"; - -message GeGraphDef { - string name = 1; - GraphDef graph = 2; -} - -message GraphDefLibrary { - repeated GeGraphDef graph_def = 1; -}; \ No newline at end of file diff --git a/ge/proto/tensorflow/node_def.proto b/ge/proto/tensorflow/node_def.proto deleted file mode 100644 index 50cf5cac..00000000 --- a/ge/proto/tensorflow/node_def.proto +++ /dev/null @@ -1,71 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "NodeProto"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; - -message NodeDef { - // The name given to this operator. Used for naming inputs, - // logging, visualization, etc. Unique within a single GraphDef. - // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*". - string name = 1; - - // The operation name. There may be custom parameters in attrs. - // Op names starting with an underscore are reserved for internal use. - string op = 2; - - // Each input is "node:src_output" with "node" being a string name and - // "src_output" indicating which output tensor to use from "node". If - // "src_output" is 0 the ":0" suffix can be omitted. Regular inputs - // may optionally be followed by control inputs that have the format - // "^node". - repeated string input = 3; - - // A (possibly partial) specification for the device on which this - // node should be placed. - // The expected syntax for this string is as follows: - // - // DEVICE_SPEC ::= PARTIAL_SPEC - // - // PARTIAL_SPEC ::= ("/" CONSTRAINT) * - // CONSTRAINT ::= ("job:" JOB_NAME) - // | ("replica:" [1-9][0-9]*) - // | ("task:" [1-9][0-9]*) - // | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") ) - // - // Valid values for this string include: - // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification) - // * "/job:worker/device:GPU:3" (partial specification) - // * "" (no specification) - // - // If the constraints do not resolve to a single device (or if this - // field is empty or not present), the runtime will attempt to - // choose a device automatically. - string device = 4; - - // Operation-specific graph-construction-time configuration. - // Note that this should include all attrs defined in the - // corresponding OpDef, including those with a value matching - // the default -- this allows the default to change and makes - // NodeDefs easier to interpret on their own. However, if - // an attr with a default is not specified in this list, the - // default will be used. - // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and - // one of the names from the corresponding OpDef's attr field). - // The values must have a type matching the corresponding OpDef - // attr's type field. - // Add some examples here showing best practices. - map attr = 5; -}; diff --git a/ge/proto/tensorflow/op_def.proto b/ge/proto/tensorflow/op_def.proto deleted file mode 100644 index 7f0e8ce2..00000000 --- a/ge/proto/tensorflow/op_def.proto +++ /dev/null @@ -1,172 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "OpDefProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "attr_value.proto"; -import "types.proto"; - -// Defines an operation. A NodeDef in a GraphDef specifies an Op by -// using the "op" field which should match the name of a OpDef. -// LINT.IfChange -message OpDef { - // Op names starting with an underscore are reserved for internal use. - // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*". - string name = 1; - - // For describing inputs and outputs. - message ArgDef { - // Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*". - string name = 1; - - // Human readable description. - string description = 2; - - // Describes the type of one or more tensors that are accepted/produced - // by this input/output arg. The only legal combinations are: - // * For a single tensor: either the "type" field is set or the - // "type_attr" field is set to the name of an attr with type "type". - // * For a sequence of tensors with the same type: the "number_attr" - // field will be set to the name of an attr with type "int", and - // either the "type" or "type_attr" field will be set as for - // single tensors. - // * For a sequence of tensors, the "type_list_attr" field will be set - // to the name of an attr with type "list(type)". - DataType type = 3; - string type_attr = 4; // if specified, attr must have type "type" - string number_attr = 5; // if specified, attr must have type "int" - // If specified, attr must have type "list(type)", and none of - // type, type_attr, and number_attr may be specified. - string type_list_attr = 6; - - // For inputs: if true, the inputs are required to be refs. - // By default, inputs can be either refs or non-refs. - // For outputs: if true, outputs are refs, otherwise they are not. - bool is_ref = 16; - }; - - // Description of the input(s). - repeated ArgDef input_arg = 2; - - // Description of the output(s). - repeated ArgDef output_arg = 3; - - // Description of the graph-construction-time configuration of this - // Op. That is to say, this describes the attr fields that will - // be specified in the NodeDef. - message AttrDef { - // A descriptive name for the argument. May be used, e.g. by the - // Python client, as a keyword argument name, and so should match - // the regexp "[a-z][a-z0-9_]+". - string name = 1; - - // One of the type names from attr_value.proto ("string", "list(string)", - // "int", etc.). - string type = 2; - - // A reasonable default for this attribute if the user does not supply - // a value. If not specified, the user must supply a value. - AttrValue default_value = 3; - - // Human-readable description. - string description = 4; - - - // --- Constraints --- - // These constraints are only in effect if specified. Default is no - // constraints. - - // For type == "int", this is a minimum value. For "list(___)" - // types, this is the minimum length. - bool has_minimum = 5; - int64 minimum = 6; - - // The set of allowed values. Has type that is the "list" version - // of the "type" field above (uses the "list" field of AttrValue). - // If type == "type" or "list(type)" above, then the "type" field - // of "allowed_values.list" has the set of allowed DataTypes. - // If type == "string" or "list(string)", then the "s" field of - // "allowed_values.list" has the set of allowed strings. - AttrValue allowed_values = 7; - } - repeated AttrDef attr = 4; - - // Optional deprecation based on GraphDef versions. - OpDeprecation deprecation = 8; - - // One-line human-readable description of what the Op does. - string summary = 5; - - // Additional, longer human-readable description of what the Op does. - string description = 6; - - // ------------------------------------------------------------------------- - // Which optimizations this operation can participate in. - - // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs) - bool is_commutative = 18; - - // If is_aggregate is true, then this operation accepts N >= 2 - // inputs and produces 1 output all of the same type. Should be - // associative and commutative, and produce output with the same - // shape as the input. The optimizer may replace an aggregate op - // taking input from multiple devices with a tree of aggregate ops - // that aggregate locally within each device (and possibly within - // groups of nearby devices) before communicating. - bool is_aggregate = 16; // for things like add - - // Other optimizations go here, like - // can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc. - - // ------------------------------------------------------------------------- - // Optimization constraints. - - // Ops are marked as stateful if their behavior depends on some state beyond - // their input tensors (e.g. variable reading op) or if they have - // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops - // must always produce the same output for the same input and have - // no side-effects. - // - // By default Ops may be moved between devices. Stateful ops should - // either not be moved, or should only be moved if that state can also - // be moved (e.g. via some sort of save / restore). - // Stateful ops are guaranteed to never be optimized away by Common - // Subexpression Elimination (CSE). - bool is_stateful = 17; // for things like variables, queue - - // ------------------------------------------------------------------------- - // Non-standard options. - - // By default, all inputs to an Op must be initialized Tensors. Ops - // that may initialize tensors for the first time should set this - // field to true, to allow the Op to take an uninitialized Tensor as - // input. - bool allows_uninitialized_input = 19; // for Assign, etc. -}; -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc) - -// Information about version-dependent deprecation of an op -message OpDeprecation { - // First GraphDef version at which the op is disallowed. - int32 version = 1; - - // Explanation of why it was deprecated and what to use instead. - string explanation = 2; -}; - -// A collection of OpDefs -message OpList { - repeated OpDef op = 1; -}; diff --git a/ge/proto/tensorflow/resource_handle.proto b/ge/proto/tensorflow/resource_handle.proto deleted file mode 100644 index 91c46c9a..00000000 --- a/ge/proto/tensorflow/resource_handle.proto +++ /dev/null @@ -1,37 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "ResourceHandle"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// Protocol buffer representing a handle to a tensorflow resource. Handles are -// not valid across executions, but can be serialized back and forth from within -// a single run. -message ResourceHandleProto { - // Unique name for the device containing the resource. - string device = 1; - - // Container in which this resource is placed. - string container = 2; - - // Unique name of this resource. - string name = 3; - - // Hash code for the type of the resource. Is only valid in the same device - // and in the same execution. - uint64 hash_code = 4; - - // For debug-only, the name of the type pointed to by this handle, if - // available. - string maybe_type_name = 5; -}; diff --git a/ge/proto/tensorflow/tensor.proto b/ge/proto/tensorflow/tensor.proto deleted file mode 100644 index 48eeb6c4..00000000 --- a/ge/proto/tensorflow/tensor.proto +++ /dev/null @@ -1,102 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "TensorProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -import "resource_handle.proto"; -import "tensor_shape.proto"; -import "types.proto"; - -// Protocol buffer representing a tensor. -message TensorProto { - DataType dtype = 1; - - // Shape of the tensor. - TensorShapeProto tensor_shape = 2; - - // Only one of the representations below is set, one of "tensor_contents" and - // the "xxx_val" attributes. We are not using oneof because as oneofs cannot - // contain repeated fields it would require another extra set of messages. - - // Version number. - // - // In version 0, if the "repeated xxx" representations contain only one - // element, that element is repeated to fill the shape. This makes it easy - // to represent a constant Tensor with a single value. - int32 version_number = 3; - - // Serialized raw tensor content from either Tensor::AsProtoTensorContent or - // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation - // can be used for all tensor types. The purpose of this representation is to - // reduce serialization overhead during RPC call by avoiding serialization of - // many repeated small items. - bytes tensor_content = 4; - - // Type specific representations that make it easy to create tensor protos in - // all languages. Only the representation corresponding to "dtype" can - // be set. The values hold the flattened representation of the tensor in - // row major order. - - // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll - // have some pointless zero padding for each value here. - repeated int32 half_val = 13 [packed = true]; - - // DT_FLOAT. - repeated float float_val = 5 [packed = true]; - - // DT_DOUBLE. - repeated double double_val = 6 [packed = true]; - - // DT_INT32, DT_INT16, DT_INT8, DT_UINT8. - repeated int32 int_val = 7 [packed = true]; - - // DT_STRING - repeated bytes string_val = 8; - - // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real - // and imaginary parts of i-th single precision complex. - repeated float scomplex_val = 9 [packed = true]; - - // DT_INT64 - repeated int64 int64_val = 10 [packed = true]; - - // DT_BOOL - repeated bool bool_val = 11 [packed = true]; - - // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real - // and imaginary parts of i-th double precision complex. - repeated double dcomplex_val = 12 [packed = true]; - - // DT_RESOURCE - repeated ResourceHandleProto resource_handle_val = 14; - - // DT_VARIANT - repeated VariantTensorDataProto variant_val = 15; - - // DT_UINT32 - repeated uint32 uint32_val = 16 [packed = true]; - - // DT_UINT64 - repeated uint64 uint64_val = 17 [packed = true]; -}; - -// Protocol buffer representing the serialization format of DT_VARIANT tensors. -message VariantTensorDataProto { - // Name of the type of objects being serialized. - string type_name = 1; - // Portions of the object that are not Tensors. - bytes metadata = 2; - // Tensors contained within objects being serialized. - repeated TensorProto tensors = 3; -} diff --git a/ge/proto/tensorflow/tensor_shape.proto b/ge/proto/tensorflow/tensor_shape.proto deleted file mode 100644 index 3a6d8c5a..00000000 --- a/ge/proto/tensorflow/tensor_shape.proto +++ /dev/null @@ -1,53 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -// Protocol buffer representing the shape of tensors. - -syntax = "proto3"; -option cc_enable_arenas = true; -option java_outer_classname = "TensorShapeProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -package domi.tensorflow; - -// Dimensions of a tensor. -message TensorShapeProto { - // One dimension of the tensor. - message Dim { - // Size of the tensor in that dimension. - // This value must be >= -1, but values of -1 are reserved for "unknown" - // shapes (values of -1 mean "unknown" dimension). Certain wrappers - // that work with TensorShapeProto may fail at runtime when deserializing - // a TensorShapeProto containing a dim value of -1. - int64 size = 1; - - // Optional name of the tensor dimension. - string name = 2; - }; - - // Dimensions of the tensor, such as {"input", 30}, {"output", 40} - // for a 30 x 40 2D tensor. If an entry has size -1, this - // corresponds to a dimension of unknown size. The names are - // optional. - // - // The order of entries in "dim" matters: It indicates the layout of the - // values in the tensor in-memory representation. - // - // The first entry in "dim" is the outermost dimension used to layout the - // values, the last entry is the innermost dimension. This matches the - // in-memory layout of RowMajor Eigen tensors. - // - // If "dim.size()" > 0, "unknown_rank" must be false. - repeated Dim dim = 2; - - // If true, the number of dimensions in the shape is unknown. - // - // If true, "dim.size()" must be 0. - bool unknown_rank = 3; -}; diff --git a/ge/proto/tensorflow/types.proto b/ge/proto/tensorflow/types.proto deleted file mode 100644 index f40e49cb..00000000 --- a/ge/proto/tensorflow/types.proto +++ /dev/null @@ -1,82 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "TypesProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// LINT.IfChange -enum DataType { - // Not a legal value for DataType. Used to indicate a DataType field - // has not been set. - DT_INVALID = 0; - - // Data types that all computation devices are expected to be - // capable to support. - DT_FLOAT = 1; - DT_DOUBLE = 2; - DT_INT32 = 3; - DT_UINT8 = 4; - DT_INT16 = 5; - DT_INT8 = 6; - DT_STRING = 7; - DT_COMPLEX64 = 8; // Single-precision complex - DT_INT64 = 9; - DT_BOOL = 10; - DT_QINT8 = 11; // Quantized int8 - DT_QUINT8 = 12; // Quantized uint8 - DT_QINT32 = 13; // Quantized int32 - DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops. - DT_QINT16 = 15; // Quantized int16 - DT_QUINT16 = 16; // Quantized uint16 - DT_UINT16 = 17; - DT_COMPLEX128 = 18; // Double-precision complex - DT_HALF = 19; - DT_RESOURCE = 20; - DT_VARIANT = 21; // Arbitrary C++ data types - DT_UINT32 = 22; - DT_UINT64 = 23; - - // Do not use! These are only for parameters. Every enum above - // should have a corresponding value below (verified by types_test). - DT_FLOAT_REF = 101; - DT_DOUBLE_REF = 102; - DT_INT32_REF = 103; - DT_UINT8_REF = 104; - DT_INT16_REF = 105; - DT_INT8_REF = 106; - DT_STRING_REF = 107; - DT_COMPLEX64_REF = 108; - DT_INT64_REF = 109; - DT_BOOL_REF = 110; - DT_QINT8_REF = 111; - DT_QUINT8_REF = 112; - DT_QINT32_REF = 113; - DT_BFLOAT16_REF = 114; - DT_QINT16_REF = 115; - DT_QUINT16_REF = 116; - DT_UINT16_REF = 117; - DT_COMPLEX128_REF = 118; - DT_HALF_REF = 119; - DT_RESOURCE_REF = 120; - DT_VARIANT_REF = 121; - DT_UINT32_REF = 122; - DT_UINT64_REF = 123; -} -// LINT.ThenChange( -// https://www.tensorflow.org/code/tensorflow/c/c_api.h, -// https://www.tensorflow.org/code/tensorflow/go/tensor.go, -// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc, -// https://www.tensorflow.org/code/tensorflow/core/framework/types.h, -// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc, -// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py, -// https://www.tensorflow.org/code/tensorflow/python/framework/function.py) diff --git a/ge/proto/tensorflow/versions.proto b/ge/proto/tensorflow/versions.proto deleted file mode 100644 index 4e81548f..00000000 --- a/ge/proto/tensorflow/versions.proto +++ /dev/null @@ -1,39 +0,0 @@ -/** - * This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow - * - * This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model. - * This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). - * All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd. - */ - -syntax = "proto3"; - -package domi.tensorflow; -option cc_enable_arenas = true; -option java_outer_classname = "VersionsProtos"; -option java_multiple_files = true; -option java_package = "org.tensorflow.framework"; - -// Version information for a piece of serialized data -// -// There are different types of versions for each type of data -// (GraphDef, etc.), but they all have the same common shape -// described here. -// -// Each consumer has "consumer" and "min_producer" versions (specified -// elsewhere). A consumer is allowed to consume this data if -// -// producer >= min_producer -// consumer >= min_consumer -// consumer not in bad_consumers -// -message VersionDef { - // The version of the code that produced this data. - int32 producer = 1; - - // Any consumer below this version is not allowed to consume this data. - int32 min_consumer = 2; - - // Specific consumer versions which are disallowed (e.g. due to bugs). - repeated int32 bad_consumers = 3; -}; From 9476853d22cfe73ed8270b9aa0890e96c9ebb70c Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 17 Jun 2021 14:51:39 +0800 Subject: [PATCH 31/51] Adaptation rectification of op_tiling. --- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 24 ++++++++++++------------ ge/hybrid/node_executor/aicore/aicore_op_task.h | 4 ++-- ge/single_op/task/op_task.cc | 13 ++++++------- metadef | 2 +- parser | 2 +- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 8cd24bd1..76082cb3 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -25,7 +25,7 @@ #include "single_op/task/build_task_utils.h" #include "single_op/task/tbe_task_builder.h" -using optiling::OpRunInfo; +using optiling::utils::OpRunInfo; namespace ge { namespace hybrid { @@ -359,9 +359,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { GE_CHECK_NOTNULL(op_desc); GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); - OpRunInfo tiling_info; - tiling_info.block_dim = -1; // codex: Using uninitialized value - tiling_info.clear_atomic = true; + OpRunInfo tiling_info(-1, true, 0); auto execution_context = context.GetExecutionContext(); @@ -370,12 +368,14 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] End"); // update op args by tiling info - block_dim_ = static_cast(tiling_info.block_dim); - op_desc->SetWorkspaceBytes(tiling_info.workspaces); - clear_atomic_ = tiling_info.clear_atomic; - - tiling_data_ = tiling_info.tiling_data.str(); - tiling_key_ = tiling_info.tiling_key; + block_dim_ = tiling_info.GetBlockDim(); + clear_atomic_ = tiling_info.GetClearAtomic(); + std::vector workspaces; + tiling_info.GetAllWorkspaces(workspaces); + op_desc->SetWorkspaceBytes(workspaces); + + tiling_data_ = tiling_info.GetAllTilingData().str(); + tiling_key_ = tiling_info.GetTilingKey(); GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); @@ -412,7 +412,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpParaCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), + GE_CHK_STATUS_RET(optiling::OpParaCalculateV2(*node, tiling_info), "[Invoke][OpParaCalculate]Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); @@ -633,7 +633,7 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), + GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), "[Invoke][OpAtomicCalculate]Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 8d7b7f1e..3c8db8c9 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -85,7 +85,7 @@ class AiCoreOpTask { virtual std::string GetKeyForTvmMagic() const; virtual std::string GetKeyForTvmMetaData() const; virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const; - virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); + virtual Status CalcTilingInfo(const NodePtr &node, optiling::utils::OpRunInfo &tiling_info); std::unique_ptr tiling_buffer_ = nullptr; std::string tiling_data_; @@ -130,7 +130,7 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { std::string GetKeyForTvmMagic() const override; std::string GetKeyForTvmMetaData() const override; std::string GetKeyForKernelName(const OpDesc &op_desc) const override; - Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; + Status CalcTilingInfo(const NodePtr &node, optiling::utils::OpRunInfo &tiling_info) override; private: Status InitAtomicAddrCleanIndices(const OpDesc &op_desc); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index e48677f8..66d70e7e 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -224,18 +224,17 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { Status TbeOpTask::UpdateRunInfo() { // invoke OpParaCalculate GELOGD("Start to invoke OpParaCalculate."); - optiling::OpRunInfo run_info; - run_info.block_dim = 0; - auto ret = optiling::OpParaCalculate(*node_, run_info); + optiling::utils::OpRunInfo run_info(0, true, 0); + auto ret = optiling::OpParaCalculateV2(*node_, run_info); if (ret != GRAPH_SUCCESS) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret); REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret); return ACL_ERROR_GE_INTERNAL_ERROR; } - block_dim_ = run_info.block_dim; - tiling_data_ = run_info.tiling_data.str(); - tiling_key_ = run_info.tiling_key; - run_info_workspaces_ = run_info.workspaces; + block_dim_ = run_info.GetBlockDim(); + tiling_data_ = run_info.GetAllTilingData().str(); + tiling_key_ = run_info.GetTilingKey(); + run_info.GetAllWorkspaces(run_info_workspaces_); GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, tiling_data_.size(), tiling_key_); return SUCCESS; diff --git a/metadef b/metadef index 8c5fd448..e189fc7f 160000 --- a/metadef +++ b/metadef @@ -1 +1 @@ -Subproject commit 8c5fd4486f870d8b63213565aa39fdf1ba1e497a +Subproject commit e189fc7f4da9f7714f009d70da4db627de17955d diff --git a/parser b/parser index 3073129b..db5ce472 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit 3073129b68c0fae12a8b7531d60782e39128a28c +Subproject commit db5ce472de0086c3e2abdaab3b0685c1d2656c96 From bd1beee90c760f8d6a61b255bfa5c54a7939fd99 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 17 Jun 2021 15:41:02 +0800 Subject: [PATCH 32/51] Fix zip bug. --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index 96c46e1a..bd471c99 100755 --- a/build.sh +++ b/build.sh @@ -371,6 +371,6 @@ elif [ "X$MINDSPORE_MODE" = "Xon" ] then cd "${OUTPUT_PATH}" find ./ -name graphengine_lib.tar -exec rm {} \; - tar -cf graphengine_lib.tar lib + tar -zcf graphengine_lib.tar lib fi echo "---------------- GraphEngine package archive generated ----------------" From fd51637c46ac0b2518e43b884a426e016ee198a4 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 17 Jun 2021 15:44:03 +0800 Subject: [PATCH 33/51] Fix zip bug. --- build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.sh b/build.sh index bd471c99..61f86945 100755 --- a/build.sh +++ b/build.sh @@ -355,13 +355,13 @@ generate_package() if [ "x${PLATFORM}" = "xtrain" ] then - tar -cf graphengine_lib.tar fwkacllib + tar -zcf graphengine_lib.tar fwkacllib elif [ "x${PLATFORM}" = "xinference" ] then - tar -cf graphengine_lib.tar acllib atc + tar -zcf graphengine_lib.tar acllib atc elif [ "x${PLATFORM}" = "xall" ] then - tar -cf graphengine_lib.tar fwkacllib acllib atc + tar -zcf graphengine_lib.tar fwkacllib acllib atc fi } From 6f130e22904dec2413815135520ce95f44d49f80 Mon Sep 17 00:00:00 2001 From: wangkai Date: Fri, 18 Jun 2021 10:37:55 +0800 Subject: [PATCH 34/51] add link header targets Signed-off-by: wangkai --- ge/common/CMakeLists.txt | 24 ++++++++++++++---------- ge/executor/CMakeLists.txt | 20 +++++++++++++------- 2 files changed, 27 insertions(+), 17 deletions(-) mode change 100644 => 100755 ge/executor/CMakeLists.txt diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 7974a46d..f55ff427 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -84,12 +84,11 @@ target_include_directories(ge_common PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - ${GE_DEPEND_DIR}/inc - ${GE_DEPEND_DIR}/inc/cce + $<$>:${GE_DEPEND_DIR}/inc> + $<$>:${GE_DEPEND_DIR}/inc/cce> #### blue zone #### - #${GE_DEPEND_DIR}/include - ${GE_CODE_DIR}/third_party/fwkacllib/inc - ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> ) target_link_options(ge_common PRIVATE @@ -98,6 +97,9 @@ target_link_options(ge_common PRIVATE target_link_libraries(ge_common PRIVATE $ + $<$>:$> + $<$>:$> + $<$>:$> static_mmpa -Wl,--no-as-needed graph @@ -151,16 +153,18 @@ target_include_directories(ge_common_static PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - ${GE_DEPEND_DIR}/inc - ${GE_DEPEND_DIR}/inc/cce + $<$>:${GE_DEPEND_DIR}/inc> + $<$>:${GE_DEPEND_DIR}/inc/cce> #### blue zone #### - #${GE_DEPEND_DIR}/include - ${GE_CODE_DIR}/third_party/fwkacllib/inc - ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> ) target_link_libraries(ge_common_static PRIVATE $ + $<$>:$> + $<$>:$> + $<$>:$> ascend_protobuf_static json c_sec diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt old mode 100644 new mode 100755 index b04216b8..b6342973 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -187,15 +187,18 @@ target_include_directories(ge_executor SYSTEM PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + $<$>:${GE_DEPEND_DIR}/inc> + $<$>:${GE_DEPEND_DIR}/inc/cce> #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc - ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> ) target_link_libraries(ge_executor PRIVATE $ + $<$>:$> + $<$>:$> + $<$>:$> json ascend_protobuf_static c_sec @@ -238,10 +241,10 @@ target_include_directories(ge_executor_shared PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - ${GE_CODE_DIR}/../inc - ${GE_CODE_DIR}/../inc/cce + $<$>:${GE_DEPEND_DIR}/inc> + $<$>:${GE_DEPEND_DIR}/inc/cce> #### blue zone #### - ${GE_CODE_DIR}/third_party/fwkacllib/inc + $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> ) target_link_options(ge_executor_shared PRIVATE @@ -251,6 +254,9 @@ target_link_options(ge_executor_shared PRIVATE target_link_libraries(ge_executor_shared PRIVATE $ + $<$>:$> + $<$>:$> + $<$>:$> -Wl,--no-as-needed ge_common runtime From 676ce23b556e20d8f49eabccc25e3ab51bf8803a Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Fri, 18 Jun 2021 11:03:51 +0800 Subject: [PATCH 35/51] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20ls?= =?UTF-8?q?=20:=20Adaptation=20rectification=20of=20op=5Ftiling.'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/hybrid/node_executor/aicore/aicore_op_task.cc | 24 ++++++++++++------------ ge/hybrid/node_executor/aicore/aicore_op_task.h | 4 ++-- ge/single_op/task/op_task.cc | 13 +++++++------ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 76082cb3..8cd24bd1 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -25,7 +25,7 @@ #include "single_op/task/build_task_utils.h" #include "single_op/task/tbe_task_builder.h" -using optiling::utils::OpRunInfo; +using optiling::OpRunInfo; namespace ge { namespace hybrid { @@ -359,7 +359,9 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { GE_CHECK_NOTNULL(op_desc); GELOGD("[%s] Start to update tiling info for task: [%s]", node->GetName().c_str(), stub_name_.c_str()); - OpRunInfo tiling_info(-1, true, 0); + OpRunInfo tiling_info; + tiling_info.block_dim = -1; // codex: Using uninitialized value + tiling_info.clear_atomic = true; auto execution_context = context.GetExecutionContext(); @@ -368,14 +370,12 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] End"); // update op args by tiling info - block_dim_ = tiling_info.GetBlockDim(); - clear_atomic_ = tiling_info.GetClearAtomic(); - std::vector workspaces; - tiling_info.GetAllWorkspaces(workspaces); - op_desc->SetWorkspaceBytes(workspaces); - - tiling_data_ = tiling_info.GetAllTilingData().str(); - tiling_key_ = tiling_info.GetTilingKey(); + block_dim_ = static_cast(tiling_info.block_dim); + op_desc->SetWorkspaceBytes(tiling_info.workspaces); + clear_atomic_ = tiling_info.clear_atomic; + + tiling_data_ = tiling_info.tiling_data.str(); + tiling_key_ = tiling_info.tiling_key; GELOGD("Successfully getting [tiling_key] : %u", tiling_key_); if (tiling_data_.empty()) { GELOGD("[%s] Tiling data is empty.", op_desc->GetName().c_str()); @@ -412,7 +412,7 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) { Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpParaCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(optiling::OpParaCalculateV2(*node, tiling_info), + GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info), "[Invoke][OpParaCalculate]Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str()); @@ -633,7 +633,7 @@ std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) co Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) { GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str()); - GE_CHK_STATUS_RET(optiling::OpAtomicCalculateV2(*node, tiling_info), + GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info), "[Invoke][OpAtomicCalculate]Failed calc tiling data of node %s.", node->GetName().c_str()); GELOGD("[%s] Done invoking OpAtomicCalculate successfully.", node->GetName().c_str()); diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index 3c8db8c9..8d7b7f1e 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -85,7 +85,7 @@ class AiCoreOpTask { virtual std::string GetKeyForTvmMagic() const; virtual std::string GetKeyForTvmMetaData() const; virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const; - virtual Status CalcTilingInfo(const NodePtr &node, optiling::utils::OpRunInfo &tiling_info); + virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info); std::unique_ptr tiling_buffer_ = nullptr; std::string tiling_data_; @@ -130,7 +130,7 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask { std::string GetKeyForTvmMagic() const override; std::string GetKeyForTvmMetaData() const override; std::string GetKeyForKernelName(const OpDesc &op_desc) const override; - Status CalcTilingInfo(const NodePtr &node, optiling::utils::OpRunInfo &tiling_info) override; + Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override; private: Status InitAtomicAddrCleanIndices(const OpDesc &op_desc); diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 66d70e7e..e48677f8 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -224,17 +224,18 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { Status TbeOpTask::UpdateRunInfo() { // invoke OpParaCalculate GELOGD("Start to invoke OpParaCalculate."); - optiling::utils::OpRunInfo run_info(0, true, 0); - auto ret = optiling::OpParaCalculateV2(*node_, run_info); + optiling::OpRunInfo run_info; + run_info.block_dim = 0; + auto ret = optiling::OpParaCalculate(*node_, run_info); if (ret != GRAPH_SUCCESS) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Invoke][OpParaCalculate] failed, ret = %u.", ret); REPORT_INNER_ERROR("E19999", "invoke OpParaCalculate failed, ret = %u.", ret); return ACL_ERROR_GE_INTERNAL_ERROR; } - block_dim_ = run_info.GetBlockDim(); - tiling_data_ = run_info.GetAllTilingData().str(); - tiling_key_ = run_info.GetTilingKey(); - run_info.GetAllWorkspaces(run_info_workspaces_); + block_dim_ = run_info.block_dim; + tiling_data_ = run_info.tiling_data.str(); + tiling_key_ = run_info.tiling_key; + run_info_workspaces_ = run_info.workspaces; GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, tiling_data_.size(), tiling_key_); return SUCCESS; From c2a1076a8734a5dbb80f5336ee9bcd8b21bec817 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=82=A8=E6=98=9F?= Date: Sat, 19 Jun 2021 15:15:51 +0800 Subject: [PATCH 36/51] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?= =?UTF-8?q?784=20:=20Create=20NodeExecute=20on-demand'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/hybrid/node_executor/node_executor.cc | 75 ++++++++------- ge/hybrid/node_executor/node_executor.h | 7 +- tests/ut/ge/CMakeLists.txt | 2 - .../hybrid/node_executor/node_executor_unittest.cc | 103 --------------------- 4 files changed, 44 insertions(+), 143 deletions(-) delete mode 100644 tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 04225557..5f3d6e45 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -58,8 +58,8 @@ Status NodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, } Status NodeExecutorManager::EnsureInitialized() { + GE_CHK_STATUS_RET(InitializeExecutors()); std::lock_guard lk(mu_); - ++ref_count_; if (initialized_) { return SUCCESS; } @@ -115,14 +115,17 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return it->second; } -Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) { +Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) const { auto executor_type = ResolveExecutorType(node); - GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); const auto it = executors_.find(executor_type); if (it == executors_.end()) { - return GetOrCreateExecutor(executor_type, executor); + REPORT_INNER_ERROR("E19999", "Failed to get executor by type: %d.", static_cast(executor_type)); + GELOGE(INTERNAL_ERROR, "[Check][ExecutorType]Failed to get executor by type: %d.", + static_cast(executor_type)); + return INTERNAL_ERROR; } + GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); *executor = it->second.get(); return SUCCESS; } @@ -175,50 +178,51 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { return OpsKernelBuilderManager::Instance().CalcOpRunningParam(node); } -Status NodeExecutorManager::GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **out_executor) { +Status NodeExecutorManager::InitializeExecutors() { std::lock_guard lk(mu_); - const auto executor_it = executors_.find(executor_type); - if (executor_it != executors_.end()) { - *out_executor = executor_it->second.get(); + if (executor_initialized_) { + ++ref_count_; + GELOGI("Executor is already initialized. add ref count to [%d]", ref_count_); return SUCCESS; } - GELOGI("Start to Initialize NodeExecutor, type = %d", static_cast(executor_type)); - auto it = builders_.find(executor_type); - if (it == builders_.end()) { - REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", - static_cast(executor_type)); - GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for executor type = %d", static_cast(executor_type)); - return INTERNAL_ERROR; - } + GELOGI("Start to Initialize NodeExecutors"); + for (auto &it : builders_) { + auto engine_type = it.first; + auto build_fn = it.second; + GE_CHECK_NOTNULL(build_fn); + auto executor = std::unique_ptr(build_fn()); + if (executor == nullptr) { + REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for engine type = %d", + static_cast(engine_type)); + GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(engine_type)); + return INTERNAL_ERROR; + } - auto build_fn = it->second; - GE_CHECK_NOTNULL(build_fn); - auto executor = std::unique_ptr(build_fn()); - if (executor == nullptr) { - REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", - static_cast(executor_type)); - GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(executor_type)); - return INTERNAL_ERROR; - } + GELOGD("Executor of engine type = %d was created successfully", static_cast(engine_type)); + auto ret = executor->Initialize(); + if (ret != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(engine_type)); + GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(engine_type)); + for (auto &executor_it : executors_) { + executor_it.second->Finalize(); + } + executors_.clear(); + return ret; + } - GELOGD("Executor of engine type = %d was created successfully", static_cast(executor_type)); - auto ret = executor->Initialize(); - if (ret != SUCCESS) { - REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(executor_type)); - GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(executor_type)); - return ret; + executors_.emplace(engine_type, std::move(executor)); } - *out_executor = executor.get(); - executors_.emplace(executor_type, std::move(executor)); - GELOGI("Initializing NodeExecutor successfully, type = %d", static_cast(executor_type)); + ++ref_count_; + executor_initialized_ = true; + GELOGI("Initializing NodeExecutors successfully."); return SUCCESS; } void NodeExecutorManager::FinalizeExecutors() { std::lock_guard lk(mu_); - if (ref_count_ <= 0) { + if (!executor_initialized_) { GELOGD("No need for finalizing for not initialized."); return; } @@ -233,6 +237,7 @@ void NodeExecutorManager::FinalizeExecutors() { it.second->Finalize(); } executors_.clear(); + executor_initialized_ = false; GELOGD("Done invoking Finalize successfully."); } diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index 97c9cee9..fffd4e7d 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -179,6 +179,8 @@ class NodeExecutorManager { */ Status EnsureInitialized(); + Status InitializeExecutors(); + void FinalizeExecutors(); /** @@ -194,7 +196,7 @@ class NodeExecutorManager { * @param executor executor * @return SUCCESS on success, error code otherwise */ - Status GetExecutor(Node &node, const NodeExecutor **executor); + Status GetExecutor(Node &node, const NodeExecutor **executor) const; /** * Resolve executor type by node @@ -204,13 +206,12 @@ class NodeExecutorManager { ExecutorType ResolveExecutorType(Node &node) const; private: - Status GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **executor); - std::map> executors_; std::map> builders_; std::map engine_mapping_; std::mutex mu_; bool initialized_ = false; + bool executor_initialized_ = false; int ref_count_ = 0; }; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 631e18f8..8b024820 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -839,7 +839,6 @@ set(HYBRID_TEST_FILES "hybrid/executor/subgraph_executor_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" - "hybrid/node_executor/node_executor_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" @@ -847,7 +846,6 @@ set(HYBRID_TEST_FILES "hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" - ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc deleted file mode 100644 index 8a1240d3..00000000 --- a/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#define private public -#define protected public -#include "hybrid/node_executor/node_executor.h" -#undef protected -#undef private - -using namespace std; -using namespace testing; - -namespace ge { -using namespace hybrid; - -namespace { - bool finalized = false; -} - -class NodeExecutorTest : public testing::Test { - protected: - void SetUp() {} - void TearDown() { } -}; - -class FailureNodeExecutor : public NodeExecutor { - public: - Status Initialize() override { - return INTERNAL_ERROR; - } -}; - -class SuccessNodeExecutor : public NodeExecutor { - public: - Status Initialize() override { - initialized = true; - finalized = false; - return SUCCESS; - } - - Status Finalize() override { - finalized = true; - } - - bool initialized = false; -}; - -REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, FailureNodeExecutor); -REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, SuccessNodeExecutor); - -TEST_F(NodeExecutorTest, TestGetOrCreateExecutor) { - auto &manager = NodeExecutorManager::GetInstance(); - const NodeExecutor *executor = nullptr; - Status ret = SUCCESS; - // no builder - ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::RESERVED, &executor); - ASSERT_EQ(ret, INTERNAL_ERROR); - // initialize failure - ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICORE, &executor); - ASSERT_EQ(ret, INTERNAL_ERROR); - ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); - ASSERT_EQ(ret, SUCCESS); - ASSERT_TRUE(executor != nullptr); - ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); - ASSERT_EQ(ret, SUCCESS); - ASSERT_TRUE(executor != nullptr); - ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); -} - -TEST_F(NodeExecutorTest, TestInitAndFinalize) { - auto &manager = NodeExecutorManager::GetInstance(); - manager.FinalizeExecutors(); - manager.EnsureInitialized(); - manager.EnsureInitialized(); - const NodeExecutor *executor = nullptr; - auto ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); - ASSERT_EQ(ret, SUCCESS); - ASSERT_TRUE(executor != nullptr); - ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); - manager.FinalizeExecutors(); - ASSERT_FALSE(manager.executors_.empty()); - manager.FinalizeExecutors(); - ASSERT_TRUE(manager.executors_.empty()); - ASSERT_TRUE(finalized); -} -} // namespace ge From 65ba89fb34c03e9ec98647bcdde2f35034093f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Sat, 19 Jun 2021 16:15:43 +0800 Subject: [PATCH 37/51] update .gitmodules. --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index c6b7cc8e..a925a8b1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,8 +1,8 @@ [submodule "parser"] path = parser url = https://gitee.com/ascend/parser.git - branch = master + branch = r1.5.0 [submodule "metadef"] path = metadef url = https://gitee.com/ascend/metadef.git - branch = master + branch = r1.5.0 From a70ab7c91ac95ea77479c7a5afa6e5f650320831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B6=9B?= Date: Sat, 19 Jun 2021 16:40:53 +0800 Subject: [PATCH 38/51] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!1?= =?UTF-8?q?806=20:=20add=20header=20targets=20for=20link'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ge/common/CMakeLists.txt | 24 ++++++++++-------------- ge/executor/CMakeLists.txt | 20 +++++++------------- 2 files changed, 17 insertions(+), 27 deletions(-) mode change 100755 => 100644 ge/executor/CMakeLists.txt diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index f55ff427..7974a46d 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -84,11 +84,12 @@ target_include_directories(ge_common PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - $<$>:${GE_DEPEND_DIR}/inc> - $<$>:${GE_DEPEND_DIR}/inc/cce> + ${GE_DEPEND_DIR}/inc + ${GE_DEPEND_DIR}/inc/cce #### blue zone #### - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> + #${GE_DEPEND_DIR}/include + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) target_link_options(ge_common PRIVATE @@ -97,9 +98,6 @@ target_link_options(ge_common PRIVATE target_link_libraries(ge_common PRIVATE $ - $<$>:$> - $<$>:$> - $<$>:$> static_mmpa -Wl,--no-as-needed graph @@ -153,18 +151,16 @@ target_include_directories(ge_common_static PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - $<$>:${GE_DEPEND_DIR}/inc> - $<$>:${GE_DEPEND_DIR}/inc/cce> + ${GE_DEPEND_DIR}/inc + ${GE_DEPEND_DIR}/inc/cce #### blue zone #### - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> + #${GE_DEPEND_DIR}/include + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) target_link_libraries(ge_common_static PRIVATE $ - $<$>:$> - $<$>:$> - $<$>:$> ascend_protobuf_static json c_sec diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt old mode 100755 new mode 100644 index b6342973..b04216b8 --- a/ge/executor/CMakeLists.txt +++ b/ge/executor/CMakeLists.txt @@ -187,18 +187,15 @@ target_include_directories(ge_executor SYSTEM PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - $<$>:${GE_DEPEND_DIR}/inc> - $<$>:${GE_DEPEND_DIR}/inc/cce> + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain> + ${GE_CODE_DIR}/third_party/fwkacllib/inc + ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain ) target_link_libraries(ge_executor PRIVATE $ - $<$>:$> - $<$>:$> - $<$>:$> json ascend_protobuf_static c_sec @@ -241,10 +238,10 @@ target_include_directories(ge_executor_shared PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}/proto/graphengine_protos #### yellow zone #### - $<$>:${GE_DEPEND_DIR}/inc> - $<$>:${GE_DEPEND_DIR}/inc/cce> + ${GE_CODE_DIR}/../inc + ${GE_CODE_DIR}/../inc/cce #### blue zone #### - $<$:${GE_CODE_DIR}/third_party/fwkacllib/inc> + ${GE_CODE_DIR}/third_party/fwkacllib/inc ) target_link_options(ge_executor_shared PRIVATE @@ -254,9 +251,6 @@ target_link_options(ge_executor_shared PRIVATE target_link_libraries(ge_executor_shared PRIVATE $ - $<$>:$> - $<$>:$> - $<$>:$> -Wl,--no-as-needed ge_common runtime From 0b04317d23e4405c93a1eb2f5e80925fae758523 Mon Sep 17 00:00:00 2001 From: wangxiaotian22 Date: Tue, 15 Jun 2021 21:08:33 +0800 Subject: [PATCH 39/51] fix cmetric --- ge/common/util.cc | 82 +++++++--------------- ge/graph/load/model_manager/davinci_model.cc | 4 +- ge/graph/preprocess/insert_op/ge_aipp_op.cc | 2 +- ge/hybrid/node_executor/hccl/hccl_node_executor.cc | 3 +- ge/ir_build/option_utils.cc | 2 +- ge/offline/main.cc | 3 +- tests/depends/mmpa/src/mmpa_stub.cc | 7 ++ tests/ut/ge/CMakeLists.txt | 1 + tests/ut/ge/common/util_unittest.cc | 63 +++++++++++++++++ tests/ut/ge/graph/load/davinci_model_unittest.cc | 3 + tests/ut/ge/graph_ir/ge_ir_build_unittest.cc | 9 ++- 11 files changed, 115 insertions(+), 64 deletions(-) create mode 100644 tests/ut/ge/common/util_unittest.cc diff --git a/ge/common/util.cc b/ge/common/util.cc index 448efc0f..dfb5bac4 100644 --- a/ge/common/util.cc +++ b/ge/common/util.cc @@ -340,15 +340,24 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char return res; } +void PathValidErrReport(const std::string &file_path, const std::string &atc_param, const std::string &reason) { + if (!atc_param.empty()) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({atc_param, file_path, reason})); + } else { + REPORT_INNER_ERROR("E19999", "Path[%s] invalid, reason:%s", file_path.c_str(), reason.c_str()); + } +} + FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const std::string &file_path, const std::string &atc_param) { // The specified path is empty std::map args_map; if (file_path.empty()) { - if (atc_param != "") { - ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); + if (!atc_param.empty()) { + REPORT_INPUT_ERROR("E10004", std::vector({"parameter"}), std::vector({atc_param})); } else { - REPORT_INNER_ERROR("E19999", "Param file_path is empty, check invalid"); + REPORT_INNER_ERROR("E19999", "Param file_path is empty, check invalid."); } GELOGW("Input parameter %s is empty.", file_path.c_str()); return false; @@ -356,13 +365,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const std::string real_path = RealPath(file_path.c_str()); // Unable to get absolute path (does not exist or does not have permission to access) if (real_path.empty()) { - if (atc_param != "") { - std::string reason = "realpath error, errmsg:" + std::string(strerror(errno)); - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, reason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s]'s realpath is empty, errmsg[%s]", file_path.c_str(), strerror(errno)); - } + std::string reason = "realpath error, errmsg:" + std::string(strerror(errno)); + PathValidErrReport(file_path, atc_param, reason); GELOGW("Path[%s]'s realpath is empty, errmsg[%s]", file_path.c_str(), strerror(errno)); return false; } @@ -378,23 +382,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( !ValidateStr(real_path, mode), - if (atc_param != "") { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, real_path, kPathValidReason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] has invalid char, %s", file_path.c_str(), kPathValidReason); - } + PathValidErrReport(file_path, atc_param, kPathValidReason); return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); // The absolute path points to a file that is not readable if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) { - if (atc_param != "") { - std::string reason = "cat not access, errmsg:" + std::string(strerror(errno)); - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, reason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] can't acccess, errmsg:%s", file_path.c_str(), strerror(errno)); - } + PathValidErrReport(file_path, atc_param, "cat not access, errmsg:" + std::string(strerror(errno))); GELOGW("Read file[%s] failed, errmsg[%s]", file_path.c_str(), strerror(errno)); return false; } @@ -406,10 +399,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const const std::string &atc_param) { // The specified path is empty if (file_path.empty()) { - if (atc_param != "") { - ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); + if (!atc_param.empty()) { + REPORT_INPUT_ERROR("E10004", std::vector({"parameter"}), std::vector({atc_param})); } else { - REPORT_INNER_ERROR("E19999", "Param file_path is empty, check invalid"); + REPORT_INNER_ERROR("E19999", "Param file_path is empty, check invalid."); } ErrorManager::GetInstance().ATCReportErrMessage("E10004", {"parameter"}, {atc_param}); GELOGW("Input parameter's value is empty."); @@ -417,17 +410,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const } GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH, - if (atc_param != "") { - std::string reason = "len is too long, it must be less than " + - std::to_string(MMPA_MAX_PATH); - ErrorManager::GetInstance().ATCReportErrMessage( - "E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, reason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] len is too long, it must be less than %d", - file_path.c_str(), MMPA_MAX_PATH); - } - return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), + std::string reason = "len is too long, it must be less than " + + std::to_string(MMPA_MAX_PATH); + PathValidErrReport(file_path, atc_param, reason); + return false, "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH); // A regular matching expression to verify the validity of the input file path @@ -441,12 +427,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( !ValidateStr(file_path, mode), - if (atc_param != "") { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, kPathValidReason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] has invalid char, %s", file_path.c_str(), kPathValidReason); - } + PathValidErrReport(file_path, atc_param, kPathValidReason); return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), file_path.c_str(), kPathValidReason); std::string real_path = RealPath(file_path.c_str()); @@ -454,13 +435,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const if (!real_path.empty()) { // File is not readable or writable if (mmAccess2(real_path.c_str(), M_W_OK | M_F_OK) != EN_OK) { - if (atc_param != "") { - std::string reason = "cat not access, errmsg:" + std::string(strerror(errno)); - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, reason}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] can't acccess, errmsg:%s", file_path.c_str(), strerror(errno)); - } + PathValidErrReport(file_path, atc_param, "cat not access, errmsg:" + std::string(strerror(errno))); GELOGW("Write file[%s] failed, errmsg[%s]", real_path.c_str(), strerror(errno)); return false; } @@ -479,12 +454,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const std::string prefix_path = std::string(file_path).substr(0, static_cast(path_split_pos)); // Determine whether the specified path is valid by creating the path if (CreateDirectory(prefix_path) != 0) { - if (atc_param != "") { - ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, - {atc_param, file_path, "Can not create directory"}); - } else { - REPORT_INNER_ERROR("E19999", "Path[%s] Can not create directory", file_path.c_str()); - } + PathValidErrReport(file_path, atc_param, "Can not create directory"); GELOGW("Can not create directory[%s].", file_path.c_str()); return false; } diff --git a/ge/graph/load/model_manager/davinci_model.cc b/ge/graph/load/model_manager/davinci_model.cc index 5b67c205..929ae158 100755 --- a/ge/graph/load/model_manager/davinci_model.cc +++ b/ge/graph/load/model_manager/davinci_model.cc @@ -3463,11 +3463,11 @@ bool DavinciModel::CheckUserAndModelSize(const int64_t &size, const int64_t &op_ } // The input and model input size can not be exactly equal because user input is not definite. if ((size + kDataMemAlignSizeCompare) < op_size) { - REPORT_INNER_ERROR("E19999", "%s size:%ld from user add align:%u < input_op_size:%ld in model, model_id:%u, " + REPORT_INNER_ERROR("E19999", "%s size:%ld from user add align:%u < op_size:%ld in model, model_id:%u, " "check invalid", input_or_output.c_str(), size, kDataMemAlignSizeCompare, op_size, model_id_); GELOGE(ACL_ERROR_GE_PARAM_INVALID, - "[Check][Param] %s size:%ld from user add align:%u < input_op_size:%ld in model, model_id:%u", + "[Check][Param] %s size:%ld from user add align:%u < op_size:%ld in model, model_id:%u", input_or_output.c_str(), size, kDataMemAlignSizeCompare, op_size, model_id_); return false; } diff --git a/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 5c191af7..2ea41b01 100755 --- a/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -114,7 +114,7 @@ Status GetDataDimN(const ge::NodePtr &data_node, ge::Format format, int64_t &bat std::vector({ data_node->GetName() + " format", TypeUtils::FormatToSerialString(format), - "only format " + TypeUtils::FormatToSerialString(FORMAT_NCHW) + " and "+ + "only format " + TypeUtils::FormatToSerialString(FORMAT_NCHW) + " and " + TypeUtils::FormatToSerialString(FORMAT_NHWC) + " supported which dynamic aipp is linked"})); GELOGE(PARAM_INVALID, "[Check][Param] Not support data format:%s, node:%s", diff --git a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc index 72092cd8..d942695e 100644 --- a/ge/hybrid/node_executor/hccl/hccl_node_executor.cc +++ b/ge/hybrid/node_executor/hccl/hccl_node_executor.cc @@ -24,6 +24,7 @@ #include "graph/types.h" #include "hybrid/executor/hybrid_execution_context.h" #include "hccl/hcom.h" +#include "runtime/event.h" namespace ge { namespace { @@ -325,7 +326,7 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function do rtEvent_t evt = nullptr; if (context.GetExecutionContext()->hccl_stream != nullptr) { - GE_CHK_RT_RET(rtEventCreateWithFlag(&evt, 0x01)); + GE_CHK_RT_RET(rtEventCreateWithFlag(&evt, RT_EVENT_WITH_FLAG)); GE_CHK_RT_RET(rtStreamWaitEvent(context.GetExecutionContext()->hccl_stream, evt)); } TaskContext *p_ctx = &context; diff --git a/ge/ir_build/option_utils.cc b/ge/ir_build/option_utils.cc index cecc2588..e2b08495 100755 --- a/ge/ir_build/option_utils.cc +++ b/ge/ir_build/option_utils.cc @@ -204,7 +204,7 @@ bool CheckDynamicImagesizeInputShapeValid(map> shape_map if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { GELOGE(ge::PARAM_INVALID, "[Check][DynamicImagesizeInputShape] input_format [%s] invalid, can not support now.", input_format.c_str()); - REPORT_INPUT_ERROR("E10003", std::vector({"parameter","value","reason"}), + REPORT_INPUT_ERROR("E10003", std::vector({"parameter", "value", "reason"}), std::vector({"input_format", input_format, "this format is not support"})); return false; } diff --git a/ge/offline/main.cc b/ge/offline/main.cc index a1ae476b..14db1ded 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -953,8 +953,7 @@ domi::Status GenerateModel(std::map &options, std::string output ge::Model load_model = ge::Model("loadmodel", "version2"); auto ret1 = load_model.LoadFromFile(FLAGS_model); if (ret1 != ge::GRAPH_SUCCESS) { - REPORT_INPUT_ERROR("E10041", std::vector({"file"}), std::vector({FLAGS_model})); - REPORT_CALL_ERROR("E19999", "load from model file:%s failed", FLAGS_model.c_str()); + REPORT_INPUT_ERROR("E10041", std::vector({"parameter"}), std::vector({FLAGS_model})); DOMI_LOGE("Load model from %s failed, please check model file or " "input parameter[--framework] is correct", FLAGS_model.c_str()); (void)ge_generator.Finalize(); diff --git a/tests/depends/mmpa/src/mmpa_stub.cc b/tests/depends/mmpa/src/mmpa_stub.cc index a82621ef..aae8de9f 100644 --- a/tests/depends/mmpa/src/mmpa_stub.cc +++ b/tests/depends/mmpa/src/mmpa_stub.cc @@ -220,6 +220,13 @@ VOID mmScandirFree(mmDirent **entryList, INT32 count) INT32 mmAccess2(const CHAR *pathName, INT32 mode) { + if (pathName == NULL) { + return EN_INVALID_PARAM; + } + INT32 ret = access(pathName, mode); + if (ret != EN_OK) { + return EN_ERROR; + } return 0; } diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 63579109..b820e465 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -761,6 +761,7 @@ set(MULTI_PARTS_TEST_FILES "graph_ir/ge_ir_build_unittest.cc" "graph/transop_util_unittest.cc" "common/datatype_transfer_unittest.cc" + "common/util_unittest.cc" "common/dump_manager_unittest.cc" "common/dump_op_unittest.cc" "common/dump_exception_unittest.cc" diff --git a/tests/ut/ge/common/util_unittest.cc b/tests/ut/ge/common/util_unittest.cc new file mode 100644 index 00000000..6df3db96 --- /dev/null +++ b/tests/ut/ge/common/util_unittest.cc @@ -0,0 +1,63 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "common/util.h" + +namespace ge { +namespace formats { +class UtestUtilTransfer : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + + +INT32 mmAccess2(const CHAR *pathName, INT32 mode) +{ + return -1; +} + +TEST_F(UtestUtilTransfer, CheckOutputPathValid) { + EXPECT_EQ(CheckOutputPathValid("", ""), false); + EXPECT_EQ(CheckOutputPathValid("", "model"), false); + + char max_file_path[14097] = {0}; + memset(max_file_path, 1, 14097); + EXPECT_EQ(CheckOutputPathValid(max_file_path, "model"), false); + + EXPECT_EQ(CheckOutputPathValid("$#%", ""), false); + + // system("touch test_util"); + // system("chmod 555 test_util"); + // EXPECT_EQ(CheckOutputPathValid("./test_util", ""), false); + // system("rm -r test_util"); +} + +TEST_F(UtestUtilTransfer, CheckInputPathValid) { + EXPECT_EQ(CheckInputPathValid("", ""), false); + EXPECT_EQ(CheckInputPathValid("", "model"), false); + + EXPECT_EQ(CheckInputPathValid("$#%", ""), false); + + EXPECT_EQ(CheckInputPathValid("./test_util", ""), false); + +} + +} +} + diff --git a/tests/ut/ge/graph/load/davinci_model_unittest.cc b/tests/ut/ge/graph/load/davinci_model_unittest.cc index 3f9cc850..378f2f07 100644 --- a/tests/ut/ge/graph/load/davinci_model_unittest.cc +++ b/tests/ut/ge/graph/load/davinci_model_unittest.cc @@ -1035,6 +1035,9 @@ TEST_F(UtestDavinciModel, NnExecute) { ProfilingManager::Instance().device_id_.emplace_back(0); model.task_list_.resize(1); EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS); + + input_data.blobs[0].length = 128; + EXPECT_NE(model.NnExecute(stream, false, input_data, output_data), SUCCESS); } TEST_F(UtestDavinciModel, update_io_addr_success) { DavinciModel model(0, nullptr); diff --git a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc index e14178d8..047c9e1d 100644 --- a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc +++ b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc @@ -368,7 +368,14 @@ TEST(UtestIrBuild, check_modify_mixlist_param) { {"ge.exec.modify_mixlist", "/modify.json"} }; ModelBufferData model; - + auto ret = aclgrphBuildModel(graph, build_options, model); EXPECT_EQ(ret, GRAPH_PARAM_INVALID); +} + +TEST(UtestIrCommon, check_dynamic_imagesize_input_shape_valid_format_empty) { + std::map> shape_map; + std::string dynamic_image_size = ""; + bool ret = CheckDynamicImagesizeInputShapeValid(shape_map, "123", dynamic_image_size); + EXPECT_EQ(ret, false); } \ No newline at end of file From bba62ec5f3763c598219ec9e786080caa0713b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8D=8E?= Date: Mon, 21 Jun 2021 16:11:26 +0800 Subject: [PATCH 40/51] opt_info --- CMakeLists.txt | 1 + ge/common/CMakeLists.txt | 8 ++ ge/common/ge_opt_info.cc | 58 ++++++++++++ ge/common/ge_opt_info.h | 31 +++++++ ge/graph/manager/graph_manager.cc | 9 +- tests/CMakeLists.txt | 1 + tests/depends/opt_info/CMakeLists.txt | 37 ++++++++ tests/depends/opt_info/src/opt_info_stub.cc | 46 ++++++++++ tests/framework/cmake/graphengine.cmake | 2 + tests/st/testcase/test_ge_opt_info.cc | 123 ++++++++++++++++++++++++++ tests/ut/ge/CMakeLists.txt | 4 + tests/ut/ge/common/ge_opt_info_unittest.cc | 82 +++++++++++++++++ third_party/fwkacllib/inc/opt_info/opt_info.h | 34 +++++++ 13 files changed, 435 insertions(+), 1 deletion(-) create mode 100644 ge/common/ge_opt_info.cc create mode 100644 ge/common/ge_opt_info.h create mode 100644 tests/depends/opt_info/CMakeLists.txt create mode 100644 tests/depends/opt_info/src/opt_info_stub.cc create mode 100644 tests/st/testcase/test_ge_opt_info.cc create mode 100644 tests/ut/ge/common/ge_opt_info_unittest.cc create mode 100644 third_party/fwkacllib/inc/opt_info/opt_info.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bed5b995..77a759ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,6 +95,7 @@ else () #find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) else() find_module(slog libalog.so ${ASCEND_ATC_DIR}) + find_module(opt_feature libopt_feature.so ${ASCEND_ATC_DIR}) find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) if(PLATFORM STREQUAL "train") find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 7974a46d..0569c91a 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -43,6 +43,7 @@ set(SRC_LIST "op/ge_op_utils.cc" "thread_pool.cc" "ge/tbe_plugin_manager.cc" + "ge_opt_info.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL) @@ -86,10 +87,12 @@ target_include_directories(ge_common PRIVATE #### yellow zone #### ${GE_DEPEND_DIR}/inc ${GE_DEPEND_DIR}/inc/cce + ${GE_DEPEND_DIR}/abl/licctrl #### blue zone #### #${GE_DEPEND_DIR}/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_common PRIVATE @@ -106,6 +109,7 @@ target_link_libraries(ge_common PRIVATE c_sec error_manager slog + opt_feature -Wl,--as-needed json $<$>:-lrt> @@ -153,10 +157,12 @@ target_include_directories(ge_common_static PRIVATE #### yellow zone #### ${GE_DEPEND_DIR}/inc ${GE_DEPEND_DIR}/inc/cce + ${GE_DEPEND_DIR}/abl/licctrl #### blue zone #### #${GE_DEPEND_DIR}/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_libraries(ge_common_static PRIVATE @@ -209,6 +215,7 @@ target_include_directories(ge_common PRIVATE ${CMAKE_BINARY_DIR}/proto/graphengine_protos ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_common PRIVATE @@ -224,6 +231,7 @@ target_link_libraries(ge_common PRIVATE c_sec error_manager slog + opt_feature static_mmpa -Wl,--as-needed json diff --git a/ge/common/ge_opt_info.cc b/ge/common/ge_opt_info.cc new file mode 100644 index 00000000..c6bac480 --- /dev/null +++ b/ge/common/ge_opt_info.cc @@ -0,0 +1,58 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common/ge_opt_info.h" + +#include +#include +#include "graph/ge_local_context.h" +#include "ge/ge_api_types.h" +#include "common/debug/ge_log.h" +#include "opt_info.h" + +namespace ge { +Status GeOptInfo::SetOptInfo() { + std::string soc_ver; + graphStatus ret = GetThreadLocalContext().GetOption(SOC_VERSION, soc_ver); + if (ret != GRAPH_SUCCESS) { + REPORT_CALL_ERROR("E19999", "Get soc version failed."); + GELOGE(FAILED, "[Get][SocVersion]Get soc version failed."); + return FAILED; + } + GELOGD("Soc version:%s.", soc_ver.c_str()); + std::map opt_info; + // the first arg does not work at present. + if (gelc::GetOptInfo(gelc::kOffline, soc_ver, opt_info) != gelc::SUCCESS) { + REPORT_CALL_ERROR("E19999", "Get optional information failed, is_offline:%d, soc version:%s", + gelc::kOffline, soc_ver.c_str()); + GELOGE(FAILED, "[Get][OptInfo]Get optional information failed, is_offline:%d, soc version:%s", + gelc::kOffline, soc_ver.c_str()); + return FAILED; + } + // do nothing if get empty information + if (opt_info.empty()) { + GELOGI("Optional information is empty."); + return SUCCESS; + } + std::map graph_options = GetThreadLocalContext().GetAllGraphOptions(); + for (const auto &itr : opt_info) { + graph_options.emplace(itr.first, itr.second); + GELOGI("Get optional information success, key:%s, value:%s.", itr.first.c_str(), itr.second.c_str()); + } + GetThreadLocalContext().SetGraphOption(graph_options); + return SUCCESS; +} +} // namespace ge diff --git a/ge/common/ge_opt_info.h b/ge/common/ge_opt_info.h new file mode 100644 index 00000000..4ec9a59f --- /dev/null +++ b/ge/common/ge_opt_info.h @@ -0,0 +1,31 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_COMMON_GE_OPT_INFO_H_ +#define GE_COMMON_GE_OPT_INFO_H_ + +#include "ge/ge_api_error_codes.h" +#include "register/register_types.h" + +namespace ge { +class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeOptInfo { + public: + GeOptInfo() = default; + static Status SetOptInfo(); +}; +} // namespace ge + +#endif // GE_COMMON_GE_OPT_INFO_H_ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index bf04ed58..3861e6ac 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -27,6 +27,7 @@ #include "common/math/math_util.h" #include "common/thread_pool.h" #include "common/dump/dump_manager.h" +#include "common/ge_opt_info.h" #include "analyzer/analyzer.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/local_context.h" @@ -949,7 +950,7 @@ Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); if (rt_ret != RT_ERROR_NONE) { - REPORT_CALL_ERROR("E19999", "Call rtCtxCreate faileded, session_id:%lu, graph_id:%u, mode:%d", + REPORT_CALL_ERROR("E19999", "Call rtCtxCreate failed, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); GELOGE(FAILED, "[Call][RtCtxCreate] faileded, session_id:%lu, graph_id:%u, mode:%d", session_id, graph_id, mode); return FAILED; @@ -1001,6 +1002,12 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector + c_sec +) + +target_include_directories(opt_feature_stub INTERFACE ${CMAKE_CURRENT_LIST_DIR}/src) diff --git a/tests/depends/opt_info/src/opt_info_stub.cc b/tests/depends/opt_info/src/opt_info_stub.cc new file mode 100644 index 00000000..df518c4b --- /dev/null +++ b/tests/depends/opt_info/src/opt_info_stub.cc @@ -0,0 +1,46 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "opt_info.h" +#include +#include +#include +#include + +namespace gelc { +namespace { +const std::vector kSocVersions = {"Ascend910"}; +} + +void SetAllOptInfo(std::map &opt_infos) { + opt_infos.emplace("opt_module.fe", "all"); + opt_infos.emplace("opt_module.pass", "all"); + opt_infos.emplace("opt_module.op_tune", "all"); + opt_infos.emplace("opt_module.rl_tune", "all"); + opt_infos.emplace("opt_module.aoe", "all"); +} + +Status GetOptInfo(WorkMode mode, const std::string &soc_ver, + std::map &opt_infos) { + if (std::find(kSocVersions.begin(), kSocVersions.end(), soc_ver)== kSocVersions.end()) { + SetAllOptInfo(opt_infos); + return SUCCESS; + } + opt_infos.emplace("opt_module.fe", "all"); + opt_infos.emplace("opt_module.pass", "all"); + opt_infos.emplace("opt_module.op_tune", "all"); + return SUCCESS; +} +} // namespace gelc diff --git a/tests/framework/cmake/graphengine.cmake b/tests/framework/cmake/graphengine.cmake index 81aa00cc..c4380016 100644 --- a/tests/framework/cmake/graphengine.cmake +++ b/tests/framework/cmake/graphengine.cmake @@ -103,6 +103,7 @@ list(APPEND INCLUDE_DIRECTORIES "${GE_CODE_DIR}/third_party/fwkacllib/inc/cce" "${GE_CODE_DIR}/third_party/fwkacllib/inc/ops" "${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain" + "${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info" "${GE_CODE_DIR}/tests/ut/ge" "${GE_CODE_DIR}/tests/ut/common" "${CMAKE_BINARY_DIR}" @@ -117,6 +118,7 @@ list(APPEND STUB_LIBS runtime_stub profiler_stub hccl_stub + opt_feature_stub error_manager_stub ascend_protobuf json diff --git a/tests/st/testcase/test_ge_opt_info.cc b/tests/st/testcase/test_ge_opt_info.cc new file mode 100644 index 00000000..8fc47a9b --- /dev/null +++ b/tests/st/testcase/test_ge_opt_info.cc @@ -0,0 +1,123 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "easy_graph/graph/box.h" +#include "easy_graph/graph/node.h" +#include "easy_graph/builder/graph_dsl.h" +#include "easy_graph/builder/box_builder.h" +#include "easy_graph/layout/graph_layout.h" +#include "easy_graph/layout/engines/graph_easy/graph_easy_option.h" +#include "easy_graph/layout/engines/graph_easy/graph_easy_executor.h" +#include "graph/graph.h" +#include "graph/compute_graph.h" +#include "framework/common/types.h" +#include "graph/debug/ge_attr_define.h" +#include "ge_graph_dsl/graph_dsl.h" +#include "ge_graph_dsl/op_desc/op_desc_cfg_box.h" +#define protected public +#define private public +#include "common/ge_opt_info.h" +#undef private +#undef protected + +namespace ge { +class STEST_opt_info : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(STEST_opt_info, get_opt_info_all) { + std::map options = {{ge::SOC_VERSION, "Ascend310"}}; + GetThreadLocalContext().SetGlobalOption(options); + + /// data1 data2 + /// \ / + /// add + // build graph + DEF_GRAPH(g1) { + CHAIN(NODE("data1", DATA)->NODE("add", ADD)); + CHAIN(NODE("data2", DATA)->NODE("add")); + }); + + auto graph = ToGeGraph(g1); + + // new session & add graph + Session session(options); + auto ret = session.AddGraph(1, graph, options); + EXPECT_EQ(ret, SUCCESS); + // build input tensor + std::vector inputs; + // build_graph through session + ret = session.BuildGraph(1, inputs); + EXPECT_EQ(ret, SUCCESS); + + std::map graph_options = GetThreadLocalContext().GetAllGraphOptions(); + auto itr = graph_options.find("opt_module.fe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.pass"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.op_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.rl_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.aoe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); +} + +TEST_F(STEST_opt_info, get_opt_info_success) { + std::map options = {{ge::SOC_VERSION, "Ascend910"}}; + GetThreadLocalContext().SetGlobalOption(options); + + /// data1 data2 + /// \ / + /// add + // build graph + DEF_GRAPH(g1) { + CHAIN(NODE("data1", DATA)->NODE("add", ADD)); + CHAIN(NODE("data2", DATA)->NODE("add")); + }); + + auto graph = ToGeGraph(g1); + + // new session & add graph + Session session(options); + auto ret = session.AddGraph(1, graph, options); + EXPECT_EQ(ret, SUCCESS); + // build input tensor + std::vector inputs; + // build_graph through session + ret = session.BuildGraph(1, inputs); + EXPECT_EQ(ret, SUCCESS); + + std::map graph_options = GetThreadLocalContext().GetAllGraphOptions(); + auto itr = graph_options.find("opt_module.fe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.pass"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.op_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); +} +} // namespace ge diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 8b024820..3ea4d1a7 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -62,6 +62,7 @@ include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc) include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/cce) include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/ops) include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain) +include_directories(${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info) include_directories(${GE_CODE_DIR}/tests/ut/ge) include_directories(${GE_CODE_DIR}/tests/ut/common) include_directories(${CMAKE_BINARY_DIR}) @@ -172,6 +173,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc" "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" + "${GE_CODE_DIR}/ge/common/ge_opt_info.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" @@ -768,6 +770,7 @@ set(MULTI_PARTS_TEST_FILES "common/dump_op_unittest.cc" "common/dump_exception_unittest.cc" "common/opdebug_register_unittest.cc" + "common/ge_opt_info_unittest.cc" "common/format_transfer_unittest.cc" "common/format_transfer_transpose_unittest.cc" "common/format_transfer_nchw_5d_unittest.cc" @@ -861,6 +864,7 @@ list(APPEND COMMON_SHARED_LIBRARIES mmpa_stub hccl_stub error_manager_stub + opt_feature_stub ascend_protobuf json ) diff --git a/tests/ut/ge/common/ge_opt_info_unittest.cc b/tests/ut/ge/common/ge_opt_info_unittest.cc new file mode 100644 index 00000000..3ac51615 --- /dev/null +++ b/tests/ut/ge/common/ge_opt_info_unittest.cc @@ -0,0 +1,82 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#define protected public +#define private public +#include "common/ge_opt_info.h" +#include "graph/ge_local_context.h" +#include "external/ge/ge_api_types.h" +#undef private +#undef protected + +namespace ge { +class UTEST_opt_info : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UTEST_opt_info, get_opt_info_success) { + std::map options = {{ge::SOC_VERSION, "Ascend910"}}; + GetThreadLocalContext().SetGlobalOption(options); + auto ret = GeOptInfo::SetOptInfo(); + EXPECT_EQ(ret, ge::SUCCESS); + std::map graph_options = GetThreadLocalContext().GetAllGraphOptions(); + auto itr = graph_options.find("opt_module.fe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.pass"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.op_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); +} + +TEST_F(UTEST_opt_info, get_opt_info_all) { + std::map global_options = {{ge::SOC_VERSION, "Ascend310"}}; + GetThreadLocalContext().SetGlobalOption(global_options); + auto ret = GeOptInfo::SetOptInfo(); + EXPECT_EQ(ret, ge::SUCCESS); + std::map graph_options = GetThreadLocalContext().GetAllGraphOptions(); + auto itr = graph_options.find("opt_module.fe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.pass"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.op_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.rl_tune"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); + itr = graph_options.find("opt_module.aoe"); + EXPECT_NE(itr, graph_options.end()); + EXPECT_EQ(itr->second, "all"); +} + +TEST_F(UTEST_opt_info, get_opt_info_failed) { + std::map options; + GetThreadLocalContext().SetGlobalOption(options); + auto ret = GeOptInfo::SetOptInfo(); + EXPECT_EQ(ret, ge::FAILED); +} + +} // namespace ge diff --git a/third_party/fwkacllib/inc/opt_info/opt_info.h b/third_party/fwkacllib/inc/opt_info/opt_info.h new file mode 100644 index 00000000..ea9bb529 --- /dev/null +++ b/third_party/fwkacllib/inc/opt_info/opt_info.h @@ -0,0 +1,34 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace gelc { +using Status = uint32_t; +using WorkMode = uint32_t; +const Status SUCCESS = 0x0; +const Status FAILED = 0xFFFFFFFF; +const WorkMode kOffline = 0x0; +const WorkMode kInline = 0x01; + +extern "C" { +__attribute__((visibility ("default"))) +Status GetOptInfo(WorkMode mode, const std::string &soc_ver, + std::map &opt_info_map); +} +} // namespace gelc + From 310959e5d97248ff32cf51fefdffb57a6ee2df72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8D=8E?= Date: Tue, 22 Jun 2021 11:53:41 +0800 Subject: [PATCH 41/51] move to ge_compile --- ge/CMakeLists.txt | 8 ++++++++ ge/common/CMakeLists.txt | 7 ------- ge/{common => ge_opt_info}/ge_opt_info.cc | 2 +- ge/{common => ge_opt_info}/ge_opt_info.h | 6 +++--- ge/graph/manager/graph_manager.cc | 2 +- tests/st/testcase/test_ge_opt_info.cc | 2 +- tests/ut/ge/CMakeLists.txt | 17 +++++++++++++---- .../ge/{common => ge_opt_info}/ge_opt_info_unittest.cc | 2 +- 8 files changed, 28 insertions(+), 18 deletions(-) rename ge/{common => ge_opt_info}/ge_opt_info.cc (98%) rename ge/{common => ge_opt_info}/ge_opt_info.h (88%) rename tests/ut/ge/{common => ge_opt_info}/ge_opt_info_unittest.cc (98%) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 81e2d539..543f9745 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -434,6 +434,7 @@ set(TRAIN_SRC_LIST "graph/build/memory/max_block_mem_assigner.cc" "graph/build/memory/var_mem_assign_util.cc" "graph/build/memory/buffer_pool_mem_assigner.cc" + "ge_opt_info/ge_opt_info.cc" ) set(INFER_SRC_LIST @@ -711,6 +712,7 @@ set(INFER_SRC_LIST "graph/build/memory/max_block_mem_assigner.cc" "graph/build/memory/var_mem_assign_util.cc" "graph/build/memory/buffer_pool_mem_assigner.cc" + "ge_opt_info/ge_opt_info.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) @@ -772,11 +774,13 @@ target_include_directories(ge_runner SYSTEM PRIVATE ${GE_CODE_DIR}/../inc/cce ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external ${GE_CODE_DIR}/../abl/adump/external + ${GE_CODE_DIR}/../abl/licctrll #### blue zone ${ASCEND_DIR}/driver/include ${ASCEND_DIR}/fwkacllib/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_runner PRIVATE @@ -799,6 +803,7 @@ target_link_libraries(ge_runner PRIVATE runtime error_manager ascend_hal_stub + opt_feature -Wl,--as-needed json -lrt @@ -853,11 +858,13 @@ target_include_directories(ge_compiler SYSTEM PRIVATE ${GE_CODE_DIR}/../inc/cce ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external ${GE_CODE_DIR}/../abl/adump/external + ${GE_CODE_DIR}/../abl/licctrl #### blue zone #### ${ASCEND_DIR}/driver/include ${ASCEND_DIR}/fwkacllib/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain + ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_compiler PRIVATE @@ -877,6 +884,7 @@ target_link_libraries(ge_compiler PRIVATE error_manager slog runtime_compile + opt_feature -Wl,--as-needed json -lrt diff --git a/ge/common/CMakeLists.txt b/ge/common/CMakeLists.txt index 0569c91a..c28bf2a7 100755 --- a/ge/common/CMakeLists.txt +++ b/ge/common/CMakeLists.txt @@ -43,7 +43,6 @@ set(SRC_LIST "op/ge_op_utils.cc" "thread_pool.cc" "ge/tbe_plugin_manager.cc" - "ge_opt_info.cc" ) if (NOT ENABLE_D AND NOT ENABLE_ACL) @@ -87,12 +86,10 @@ target_include_directories(ge_common PRIVATE #### yellow zone #### ${GE_DEPEND_DIR}/inc ${GE_DEPEND_DIR}/inc/cce - ${GE_DEPEND_DIR}/abl/licctrl #### blue zone #### #${GE_DEPEND_DIR}/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain - ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_common PRIVATE @@ -157,12 +154,10 @@ target_include_directories(ge_common_static PRIVATE #### yellow zone #### ${GE_DEPEND_DIR}/inc ${GE_DEPEND_DIR}/inc/cce - ${GE_DEPEND_DIR}/abl/licctrl #### blue zone #### #${GE_DEPEND_DIR}/include ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain - ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_libraries(ge_common_static PRIVATE @@ -215,7 +210,6 @@ target_include_directories(ge_common PRIVATE ${CMAKE_BINARY_DIR}/proto/graphengine_protos ${GE_CODE_DIR}/third_party/fwkacllib/inc ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain - ${GE_CODE_DIR}/third_party/fwkacllib/inc/opt_info ) target_link_options(ge_common PRIVATE @@ -231,7 +225,6 @@ target_link_libraries(ge_common PRIVATE c_sec error_manager slog - opt_feature static_mmpa -Wl,--as-needed json diff --git a/ge/common/ge_opt_info.cc b/ge/ge_opt_info/ge_opt_info.cc similarity index 98% rename from ge/common/ge_opt_info.cc rename to ge/ge_opt_info/ge_opt_info.cc index c6bac480..8c1b84ab 100644 --- a/ge/common/ge_opt_info.cc +++ b/ge/ge_opt_info/ge_opt_info.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "common/ge_opt_info.h" +#include "ge_opt_info/ge_opt_info.h" #include #include diff --git a/ge/common/ge_opt_info.h b/ge/ge_opt_info/ge_opt_info.h similarity index 88% rename from ge/common/ge_opt_info.h rename to ge/ge_opt_info/ge_opt_info.h index 4ec9a59f..935dff25 100644 --- a/ge/common/ge_opt_info.h +++ b/ge/ge_opt_info/ge_opt_info.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef GE_COMMON_GE_OPT_INFO_H_ -#define GE_COMMON_GE_OPT_INFO_H_ +#ifndef GE_OPT_INFO_GE_OPT_INFO_H_ +#define GE_OPT_INFO_GE_OPT_INFO_H_ #include "ge/ge_api_error_codes.h" #include "register/register_types.h" @@ -28,4 +28,4 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY GeOptInfo { }; } // namespace ge -#endif // GE_COMMON_GE_OPT_INFO_H_ +#endif // GE_OPT_INFO_GE_OPT_INFO_H_ diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 3861e6ac..0a4633ad 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -27,7 +27,7 @@ #include "common/math/math_util.h" #include "common/thread_pool.h" #include "common/dump/dump_manager.h" -#include "common/ge_opt_info.h" +#include "ge_opt_info/ge_opt_info.h" #include "analyzer/analyzer.h" #include "graph/common/ge_call_wrapper.h" #include "graph/common/local_context.h" diff --git a/tests/st/testcase/test_ge_opt_info.cc b/tests/st/testcase/test_ge_opt_info.cc index 8fc47a9b..457473b1 100644 --- a/tests/st/testcase/test_ge_opt_info.cc +++ b/tests/st/testcase/test_ge_opt_info.cc @@ -30,7 +30,7 @@ #include "ge_graph_dsl/op_desc/op_desc_cfg_box.h" #define protected public #define private public -#include "common/ge_opt_info.h" +#include "ge_opt_info/ge_opt_info.h" #undef private #undef protected diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 3ea4d1a7..ea2ac14c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -173,7 +173,6 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/dump/exception_dumper.cc" "${GE_CODE_DIR}/ge/common/dump/opdebug_register.cc" "${GE_CODE_DIR}/ge/common/dump/dump_op.cc" - "${GE_CODE_DIR}/ge/common/ge_opt_info.cc" "${GE_CODE_DIR}/ge/common/helper/om_file_helper.cc" "${GE_CODE_DIR}/ge/model/ge_root_model.cc" "${GE_CODE_DIR}/ge/common/model_parser/model_parser.cc" @@ -348,6 +347,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/common/ge/datatype_util.cc" "${GE_CODE_DIR}/ge/ge_local_engine/engine/host_cpu_engine.cc" "${GE_CODE_DIR}/ge/session/omg.cc" + "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" ) set(COMMON_FORMAT_SRC_FILES @@ -379,7 +379,6 @@ set(GRAPH_OPTIMIZE_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/optimize/mem_rw_conflict_optimize.cc" ) - set(GRAPH_PREPARE_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/preprocess/graph_preprocess.cc" "${GE_CODE_DIR}/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc" @@ -455,6 +454,7 @@ set(GRAPH_EXECUTE_COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/manager/graph_manager.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_context.cc" "${GE_CODE_DIR}/ge/graph/manager/util/rt_context_util.cc" + "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" "${GE_CODE_DIR}/ge/graph/manager/graph_context.h" ) @@ -630,6 +630,10 @@ set(SINGLE_OP_SRC_FILES "${GE_CODE_DIR}/ge/hybrid/hybrid_davinci_model.cc" ) +set(GE_OPT_INFO_SRC_FILES + "${GE_CODE_DIR}/ge/ge_opt_info/ge_opt_info.cc" +) + # test files set(COMMON_TEST_FILES "graph/passes/graph_builder_utils.cc" @@ -767,10 +771,9 @@ set(MULTI_PARTS_TEST_FILES "graph/transop_util_unittest.cc" "common/datatype_transfer_unittest.cc" "common/dump_manager_unittest.cc" - "common/dump_op_unittest.cc" "common/dump_exception_unittest.cc" + "common/dump_op_unittest.cc" "common/opdebug_register_unittest.cc" - "common/ge_opt_info_unittest.cc" "common/format_transfer_unittest.cc" "common/format_transfer_transpose_unittest.cc" "common/format_transfer_nchw_5d_unittest.cc" @@ -815,6 +818,10 @@ set(MULTI_PARTS_TEST_FILES "common/host_cpu_engine_unittest.cc" ) +set(GE_OPT_INFO_TEST_FILES + "ge_opt_info/ge_opt_info_unittest.cc" +) + set(GENERATOR_TEST_FILES "generator/ge_generator_unittest.cc" ) @@ -1110,10 +1117,12 @@ target_link_libraries(ut_libge_multiparts_utest # libge_others_utest add_executable(ut_libge_others_utest + ${GE_OPT_INFO_SRC_FILES} ${COMMON_TEST_FILES} ${PASS_TEST_FILES} ${EXECUTE_TEST_FILES} ${OTHERS_TEST_FILES} + ${GE_OPT_INFO_TEST_FILES} ) target_compile_options(ut_libge_others_utest PRIVATE diff --git a/tests/ut/ge/common/ge_opt_info_unittest.cc b/tests/ut/ge/ge_opt_info/ge_opt_info_unittest.cc similarity index 98% rename from tests/ut/ge/common/ge_opt_info_unittest.cc rename to tests/ut/ge/ge_opt_info/ge_opt_info_unittest.cc index 3ac51615..20c123e9 100644 --- a/tests/ut/ge/common/ge_opt_info_unittest.cc +++ b/tests/ut/ge/ge_opt_info/ge_opt_info_unittest.cc @@ -19,7 +19,7 @@ #define protected public #define private public -#include "common/ge_opt_info.h" +#include "ge_opt_info/ge_opt_info.h" #include "graph/ge_local_context.h" #include "external/ge/ge_api_types.h" #undef private From 49c2eadb7c72eca99bd16d966dcf58546eee85d1 Mon Sep 17 00:00:00 2001 From: wangzhengjun Date: Mon, 21 Jun 2021 19:32:53 +0800 Subject: [PATCH 42/51] skip control flow op when replace node with empty tensor --- ge/graph/passes/replace_with_empty_const_pass.cc | 20 ++++++++++ .../replace_with_empty_const_pass_unittest.cc | 45 ++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/ge/graph/passes/replace_with_empty_const_pass.cc b/ge/graph/passes/replace_with_empty_const_pass.cc index 9459c852..3176d1ee 100644 --- a/ge/graph/passes/replace_with_empty_const_pass.cc +++ b/ge/graph/passes/replace_with_empty_const_pass.cc @@ -21,7 +21,23 @@ #include "framework/common/debug/ge_log.h" #include "framework/common/ge_inner_error_codes.h" #include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +namespace { +const std::unordered_set kControlFlowOps = { + ge::SWITCH, + ge::REFSWITCH, + ge::MERGE, + ge::REFMERGE, + ge::ENTER, + ge::REFENTER, + ge::NEXTITERATION, + ge::REFNEXTITERATION, + ge::EXIT, + ge::REFEXIT, + ge::LOOPCOND +}; +} namespace ge { Status ReplaceWithEmptyConstPass::Run(NodePtr &node) { GELOGD("ReplaceWithEmptyConstPass in."); @@ -39,6 +55,10 @@ Status ReplaceWithEmptyConstPass::Run(NodePtr &node) { GELOGI("Node %s is const. Ignore current pass.", node->GetName().c_str()); return SUCCESS; } + if (kControlFlowOps.count(NodeUtils::GetNodeType(node)) != 0) { + GELOGI("Node %s is control flow op. Ignore current pass.", node->GetName().c_str()); + return SUCCESS; + } // Node like no op, it has no output if (node->GetOpDesc()->GetAllOutputsDescPtr().empty()) { GELOGI("Node %s has no output desc. Ignore current pass.", node->GetName().c_str()); diff --git a/tests/ut/ge/graph/passes/replace_with_empty_const_pass_unittest.cc b/tests/ut/ge/graph/passes/replace_with_empty_const_pass_unittest.cc index 6711b0d3..d353498c 100644 --- a/tests/ut/ge/graph/passes/replace_with_empty_const_pass_unittest.cc +++ b/tests/ut/ge/graph/passes/replace_with_empty_const_pass_unittest.cc @@ -57,6 +57,36 @@ ut::GraphBuilder Graph1Builder() { builder.AddDataEdge(cast1, 0, conv2d, 0); return builder; } + +/// data1 const1 +/// \ / +/// add1 +/// | +/// data2 -> switch1 (empty) +/// | +/// conv2d +ut::GraphBuilder Graph2Builder() { + ut::GraphBuilder builder = ut::GraphBuilder("graph2"); + auto data1 = builder.AddNode("data1", "Data", 0, 1); + auto data2 = builder.AddNode("data2", "Data", 0, 1); + auto const1 = builder.AddNode("const1", "Const", 0, 1); + auto add1 = builder.AddNode("add1", "Add", 2, 1); + auto switch1 = builder.AddNode("switch1", "Switch", 2, 1); + auto conv2d = builder.AddNode("conv2d", "Conv2D", 1, 0); + + add1->GetOpDesc()->AddInputDesc(GeTensorDesc(GeShape({1, 1, 8, 8}),FORMAT_NCHW)); + add1->GetOpDesc()->AddInputDesc(GeTensorDesc(GeShape({1, 1, 8, 8}),FORMAT_NCHW)); + add1->GetOpDesc()->AddOutputDesc(GeTensorDesc(GeShape({1, 1, 8, 8}),FORMAT_NCHW)); + GeTensorDesc empty_tensor(GeShape({1, 0, 8, 8}),FORMAT_NCHW); + switch1->GetOpDesc()->UpdateOutputDesc(0, empty_tensor); + + builder.AddDataEdge(data1, 0, add1, 0); + builder.AddDataEdge(const1, 0, add1, 1); + builder.AddDataEdge(add1, 0, switch1, 0); + builder.AddDataEdge(data2, 0, switch1, 1); + builder.AddDataEdge(switch1, 0, conv2d, 0); + return builder; +} } // namespace @@ -85,4 +115,19 @@ TEST_F(UtestReplaceWithEmptyConstPass, replace_whith_empty_const_success) { auto conv2d = graph->FindNode("conv2d"); EXPECT_EQ(conv2d->GetInDataNodes().at(0)->GetType(),"Const"); } + +TEST_F(UtestReplaceWithEmptyConstPass, replace_whith_empty_switch_skip) { + auto builder = Graph2Builder(); + auto graph = builder.GetGraph(); + graph->SetSessionID(0); + ReplaceWithEmptyConstPass replace_with_empty_const_pass; + + EXPECT_EQ(graph->GetDirectNodesSize(), 6); + // run pass on switch1, graph still has 6 nodes + auto switch1 = graph->FindNode("switch1"); + EXPECT_NE(switch1, nullptr); + Status ret = replace_with_empty_const_pass.Run(switch1); + EXPECT_EQ(ret, SUCCESS); + EXPECT_EQ(graph->GetDirectNodesSize(), 6); +} } // namespace ge From 26fc20295379e5c275c43b5e2690624138a80baa Mon Sep 17 00:00:00 2001 From: mindspore-ci-bot <314202276@qq.com> Date: Wed, 23 Jun 2021 12:38:41 +0000 Subject: [PATCH 43/51] !1848 add op_precision_mode option and support op_debug_level = 4 From: @lianghuikang Reviewed-by: Signed-off-by: --- ge/ir_build/ge_ir_build.cc | 69 +++++++++++++++++++-------- ge/offline/main.cc | 36 ++++++++++++-- ge/session/inner_session.cc | 12 +++++ inc/external/ge/ge_api_types.h | 5 ++ tests/ut/ge/graph_ir/ge_ir_build_unittest.cc | 51 ++++++++++++++++++++ tests/ut/ge/session/ge_api_unittest.cc | 2 +- tests/ut/ge/session/inner_session_unittest.cc | 10 ++++ 7 files changed, 158 insertions(+), 27 deletions(-) diff --git a/ge/ir_build/ge_ir_build.cc b/ge/ir_build/ge_ir_build.cc index befffa93..686385d4 100644 --- a/ge/ir_build/ge_ir_build.cc +++ b/ge/ir_build/ge_ir_build.cc @@ -263,6 +263,7 @@ class Impl { omg_context_.user_attr_index_valid = false; }; ~Impl() { (void)generator_.Finalize(); }; + graphStatus CheckBuildModeAndBuildStep(); graphStatus GetSupportedOptions(const std::map &in, std::map &out); graphStatus CheckOptions(const std::map &options); @@ -451,6 +452,37 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { return GRAPH_SUCCESS; } +graphStatus Impl::CheckBuildModeAndBuildStep() { + std::string build_mode; + auto it = options_.find(BUILD_MODE); + if (it != options_.end() && !(it->second.empty())) { + if (build_mode_options.find(it->second) == build_mode_options.end()) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({BUILD_MODE, it->second, "value is unsupported. Please check!"})); + GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildMode]:%s is unsupported. Please check!", it->second.c_str()); + return GRAPH_PARAM_INVALID; + } + build_mode = it->second; + } + it = options_.find(BUILD_STEP); + if (it != options_.end() && !(it->second.empty())) { + if (build_step_options.find(it->second) == build_step_options.end()) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({BUILD_STEP, it->second, "value is unsupported. Please check!"})); + GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildStep]:%s is unsupported. Please check!", it->second.c_str()); + return GRAPH_PARAM_INVALID; + } + } else { + if (build_mode == BUILD_MODE_TUNING) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({BUILD_MODE, it->second, "tuning must specify build step. Please check!"})); + GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildMode] tuning must specify build step. Please check!"); + return GRAPH_PARAM_INVALID; + } + } + return GRAPH_SUCCESS; +} + graphStatus Impl::GetSupportedOptions(const std::map &in, std::map &out) { for (auto &ele : in) { @@ -475,29 +507,12 @@ graphStatus Impl::CheckOptions(const std::map &options } // Check options build_mode and build_step. - std::string build_mode; - auto it = options_.find(BUILD_MODE); - if (it != options_.end() && !(it->second.empty())) { - if (build_mode_options.find(it->second) == build_mode_options.end()) { - GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildMode]:%s is unsupported. Please check!", it->second.c_str()); - return GRAPH_PARAM_INVALID; - } - build_mode = it->second; - } - it = options_.find(BUILD_STEP); - if (it != options_.end() && !(it->second.empty())) { - if (build_step_options.find(it->second) == build_step_options.end()) { - GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildStep]:%s is unsupported. Please check!", it->second.c_str()); - return GRAPH_PARAM_INVALID; - } - } else { - if (build_mode == BUILD_MODE_TUNING) { - GELOGE(GRAPH_PARAM_INVALID, "[Check][BuildMode] tuning must specify build step. Please check!"); - return GRAPH_PARAM_INVALID; - } + ret = CheckBuildModeAndBuildStep(); + if (ret != GRAPH_SUCCESS) { + return ret; } // Check option EXEC_DISABLE_REUSED_MEMORY - it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); + auto it = options_.find(ge::ir_option::EXEC_DISABLE_REUSED_MEMORY); if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { return GRAPH_PARAM_INVALID; } @@ -505,6 +520,18 @@ graphStatus Impl::CheckOptions(const std::map &options if (ge::CheckModifyMixlistParamValid(options_) != GRAPH_SUCCESS) { return GRAPH_PARAM_INVALID; } + // Check option OP_PRECISION_MODE + it = options_.find(ge::ir_option::OP_PRECISION_MODE); + if (it != options_.end() && !it->second.empty() && !ge::CheckInputPathValid(it->second)) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({ge::ir_option::OP_PRECISION_MODE, it->second, "path is not found"})); + GELOGE(GRAPH_PARAM_INVALID, "[Check][OP_PRECISION_MODE] %s not found", it->second.c_str()); + return GRAPH_PARAM_INVALID; + } + if (it != options_.end()) { + GELOGI("Option set successfully, option_key=%s, option_value=%s", + ge::ir_option::OP_PRECISION_MODE, it->second.c_str()); + } // Check Input Format if (options_.find(kInputFormat) != options_.end()) { return CheckInputFormat(options_[kInputFormat]); diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 14db1ded..7043fbeb 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -106,10 +106,14 @@ DEFINE_string(out_nodes, "", "Optional; output nodes designated by users." "Format: \"node_name1:0;node_name1:1;node_name2:0\""); +DEFINE_string(op_precision_mode, "", "Optional; operator precision mode configuration file path"); + DEFINE_string(precision_mode, "force_fp16", "Optional; precision mode." "Support force_fp16, force_fp32, allow_mix_precision, allow_fp32_to_fp16, must_keep_origin_dtype."); +DEFINE_string(modify_mixlist, "", "Optional; operator mixed precision configuration file path"); + DEFINE_string(keep_dtype, "", "Optional; config file to specify the precision used by the operator during compilation."); @@ -192,8 +196,11 @@ DEFINE_string(log, "null", "Optional; generate atc log. Support debug, info, war DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0."); -DEFINE_int32(op_debug_level, 0, "Optional; configure debug level of compiler. 0(default): close debug;" - "1: open TBE compiler, export ccec file and TBE instruction mapping file; 2: open ccec compiler"); +DEFINE_int32(op_debug_level, 0, "Optional; configure debug level of compiler. 0(default): close debug; " + "1: open TBE compiler, export ccec file and TBE instruction mapping file; 2: open ccec compiler; " + "3: disable debug, and keep generating kernel file (.o and .json); 4: disable debug, " + "keep generation kernel file (.o and .json) and generate the operator CCE file (.cce) " + "and the UB fusion computing description file (.json)"); DEFINE_string(enable_scope_fusion_passes, "", "Optional; validate the non-general scope fusion pass," "multiple names can be set and separated by ','."); DEFINE_string(debug_dir, "", "Optional; the path to save the intermediate files of operator compilation"); @@ -210,8 +217,6 @@ DEFINE_string(display_model_info, "0", "Optional; display model info"); DEFINE_string(device_id, "0", "Optional; user device id"); -DEFINE_string(modify_mixlist, "", "Optional; operator mixed precision configuration file path"); - class GFlagUtils { public: /** @@ -298,8 +303,10 @@ class GFlagUtils { "\"l1_optimize\", \"off_optimize\"\n" " --mdl_bank_path Set the path of the custom repository generated after model tuning.\n" "\n[Operator Tuning]\n" + " --op_precision_mode Set the path of operator precision mode configuration file (.ini)\n" " --precision_mode precision mode, support force_fp16(default), force_fp32, allow_mix_precision, " "allow_fp32_to_fp16, must_keep_origin_dtype.\n" + " --modify_mixlist Set the path of operator mixed precision configuration file.\n" " --keep_dtype Retains the precision of certain operators in inference " "scenarios by using a configuration file.\n" " --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n" @@ -315,7 +322,8 @@ class GFlagUtils { " 2: Enable TBE pipe_all, generate the operator CCE file and Python-CCE mapping file " "(.json), and enable the CCE compiler -O0-g.\n" " 3: Disable debug, and keep generating kernel file (.o and .json)\n" - " --modify_mixlist Set the path of operator mixed precision configuration file.\n" + " 4: Disable debug, keep generation kernel file (.o and .json) and generate the " + "operator CCE file (.cce) and the UB fusion computing description file (.json)" "\n[Debug]\n" " --save_original_model Control whether to output original model. E.g.: true: output original model\n" " --log Generate log with level. Support debug, info, warning, error, null\n" @@ -365,6 +373,14 @@ class GFlagUtils { FLAGS_op_select_implmode) != ge::SUCCESS, ret = ge::FAILED, "[Check][ImplMode]check optypelist_for_implmode and op_select_implmode failed!"); + if (!FLAGS_op_precision_mode.empty() && !ge::CheckInputPathValid(FLAGS_op_precision_mode, "--op_precision_mode")) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {"op_precision_mode", FLAGS_op_precision_mode.c_str(), + "path is not found"}); + GELOGE(ge::FAILED, "[Check][op_precision_mode] %s not found", FLAGS_op_precision_mode.c_str()); + ret = ge::FAILED; + } + if (ge::CheckModifyMixlistParamValid(FLAGS_precision_mode, FLAGS_modify_mixlist) != ge::SUCCESS) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, {"modify_mixlist", FLAGS_modify_mixlist.c_str(), @@ -1049,6 +1065,7 @@ static void SetEnvForSingleOp(std::map &options) { options.emplace(ge::RUN_FLAG, flag_off); options.emplace(ge::OPTION_GRAPH_RUN_MODE, flag_off); options.emplace(ge::SINGLE_OP_FLAG, flag_on); + options.emplace(ge::OP_PRECISION_MODE, FLAGS_op_precision_mode); options.emplace(ge::PRECISION_MODE, FLAGS_precision_mode); options.emplace(ge::SOC_VERSION, FLAGS_soc_version); options.emplace(ge::CORE_TYPE, FLAGS_core_type); @@ -1076,6 +1093,14 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { ge::CheckImplmodeParamValid(FLAGS_optypelist_for_implmode, FLAGS_op_select_implmode) != ge::SUCCESS, return ge::FAILED, "[Check][ImplmodeParam] fail for input optypelist_for_implmode and op_select_implmode."); + if (!FLAGS_op_precision_mode.empty() && !ge::CheckInputPathValid(FLAGS_op_precision_mode, "--op_precision_mode")) { + ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, + {"op_precision_mode", FLAGS_op_precision_mode.c_str(), + "path is not found"}); + GELOGE(ge::FAILED, "[Check][op_precision_mode] %s not found", FLAGS_op_precision_mode.c_str()); + return ge::FAILED; + } + if (ge::CheckModifyMixlistParamValid(FLAGS_precision_mode, FLAGS_modify_mixlist) != ge::SUCCESS) { ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "value", "reason"}, {"modify_mixlist", FLAGS_modify_mixlist.c_str(), @@ -1159,6 +1184,7 @@ domi::Status GenerateOmModel() { options.insert(std::pair(string(ge::CALIBRATION_CONF_FILE), FLAGS_cal_conf)); options.insert(std::pair(string(ge::OUTPUT_NODE_NAME), FLAGS_out_nodes)); options.insert(std::pair(string(ge::INSERT_OP_FILE), FLAGS_insert_op_conf)); + options.insert(std::pair(string(ge::OP_PRECISION_MODE), FLAGS_op_precision_mode)); options.insert(std::pair(string(ge::PRECISION_MODE), FLAGS_precision_mode)); options.insert(std::pair(string(ge::TUNE_DEVICE_IDS), FLAGS_device_id)); diff --git a/ge/session/inner_session.cc b/ge/session/inner_session.cc index 8248eecf..f82bcc80 100755 --- a/ge/session/inner_session.cc +++ b/ge/session/inner_session.cc @@ -82,6 +82,18 @@ Status InnerSession::Initialize() { return ret; } + //Check option OP_PRECISION_MODE + auto iter = all_options.find(ge::OP_PRECISION_MODE); + if (iter != all_options.end() && !iter->second.empty() && !ge::CheckInputPathValid(iter->second)) { + REPORT_INPUT_ERROR("E10001", std::vector({"parameter", "value", "reason"}), + std::vector({ge::OP_PRECISION_MODE, iter->second, "path is not found"})); + GELOGE(PARAM_INVALID, "[Check][OP_PRECISION_MODE] %s not found", iter->second.c_str()); + return FAILED; + } + if (iter != all_options.end()) { + GELOGI("Option set successfully, option_key=%s, option_value=%s", + ge::OP_PRECISION_MODE.c_str(), iter->second.c_str()); + } // Check option modify_mixlist if (ge::CheckModifyMixlistParamValid(all_options) != ge::SUCCESS) { return FAILED; diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index fbd6c020..6f5bbfbf 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -113,6 +113,7 @@ const char *const INPUT_FP16_NODES = "ge.INPUT_NODES_SET_FP16"; const char *const OP_DEBUG_LEVEL = "ge.opDebugLevel"; const char *const PERFORMANCE_MODE = "ge.performance_mode"; const char *const MODIFY_MIXLIST = "ge.exec.modify_mixlist"; +const char *const OP_PRECISION_MODE = "ge.exec.op_precision_mode"; } // namespace configure_option // Configure stream num by Session constructor options param, // its value should be int32_t type, default value is "1" @@ -326,6 +327,8 @@ const std::string PERFORMANCE_MODE = "ge.performance_mode"; const std::string MODIFY_MIXLIST = "ge.exec.modify_mixlist"; +const std::string OP_PRECISION_MODE = "ge.exec.op_precision_mode"; + // Graph run mode enum GraphRunMode { PREDICTION = 0, TRAIN }; @@ -405,6 +408,7 @@ static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); static const char *const PERFORMANCE_MODE = ge::PERFORMANCE_MODE.c_str(); static const char *const MODIFY_MIXLIST = ge::MODIFY_MIXLIST.c_str(); +static const char *const OP_PRECISION_MODE = ge::OP_PRECISION_MODE.c_str(); // for interface: aclgrphBuildModel #ifdef __GNUC__ @@ -416,6 +420,7 @@ const std::set ir_builder_suppported_options = {INPUT_FORMAT, DYNAMIC_IMAGE_SIZE, DYNAMIC_DIMS, INSERT_OP_FILE, + OP_PRECISION_MODE, PRECISION_MODE, TUNE_DEVICE_IDS, EXEC_DISABLE_REUSED_MEMORY, diff --git a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc index 047c9e1d..823981d3 100644 --- a/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc +++ b/tests/ut/ge/graph_ir/ge_ir_build_unittest.cc @@ -378,4 +378,55 @@ TEST(UtestIrCommon, check_dynamic_imagesize_input_shape_valid_format_empty) { std::string dynamic_image_size = ""; bool ret = CheckDynamicImagesizeInputShapeValid(shape_map, "123", dynamic_image_size); EXPECT_EQ(ret, false); +} + +TEST(UtestIrBuild, check_op_precision_mode_param) { + Graph graph = BuildIrGraph1(); + const std::map build_options = { + {"ge.exec.op_precision_mode", "./op_precision_mode.ini"} + }; + ModelBufferData model; + + auto ret = aclgrphBuildModel(graph, build_options, model); + EXPECT_EQ(ret, GRAPH_PARAM_INVALID); +} + +TEST(UtestIrBuild, check_build_model_and_build_step) { + Graph graph_1 = BuildIrGraph1(); + const std::map build_options_1 = { + {"ge.buildMode", "xxx"} + }; + ModelBufferData model_1; + auto ret_1 = aclgrphBuildModel(graph_1, build_options_1, model_1); + EXPECT_NE(ret_1, GRAPH_SUCCESS); + + Graph graph_2 = BuildIrGraph1(); + const std::map build_options_2 = { + {"ge.buildStep", "xxx"} + }; + ModelBufferData model_2; + auto ret_2 = aclgrphBuildModel(graph_2, build_options_2, model_2); + EXPECT_NE(ret_2, GRAPH_SUCCESS); + + Graph graph_3 = BuildIrGraph1(); + const std::map build_options_3 = { + {"ge.buildMode", "tuning"} + }; + ModelBufferData model_3; + auto ret_3 = aclgrphBuildModel(graph_3, build_options_3, model_3); + EXPECT_NE(ret_3, GRAPH_SUCCESS); +} + +TEST(UtestIrBuild, atc_cfg_optype_param) { + ComputeGraphPtr graph = BuildComputeGraph1(); + FILE *fp = fopen("./keep.txt", "w+"); + if (fp) { + fprintf(fp, "Test\n"); + fprintf(fp, "OpType::Mul\n"); + fprintf(fp, "Optype::Sub\n"); + fclose(fp); + } + auto ret = KeepDtypeFunc(graph, "./keep.txt"); + (void)remove("./keep.txt"); + EXPECT_EQ(ret, GRAPH_PARAM_INVALID); } \ No newline at end of file diff --git a/tests/ut/ge/session/ge_api_unittest.cc b/tests/ut/ge/session/ge_api_unittest.cc index 2cabc4a3..9a7058f3 100644 --- a/tests/ut/ge/session/ge_api_unittest.cc +++ b/tests/ut/ge/session/ge_api_unittest.cc @@ -64,7 +64,7 @@ TEST_F(UtestGeApi, build_graph_success) { ASSERT_NE(ret, SUCCESS); } -TEST_F(UtestGeApi, ge_initialize) { +TEST_F(UtestGeApi, ge_initialize_modify_mixlist) { std::map options = { {ge::MODIFY_MIXLIST, "/mixlist.json"} }; diff --git a/tests/ut/ge/session/inner_session_unittest.cc b/tests/ut/ge/session/inner_session_unittest.cc index ecad56d6..0d20f06a 100644 --- a/tests/ut/ge/session/inner_session_unittest.cc +++ b/tests/ut/ge/session/inner_session_unittest.cc @@ -53,4 +53,14 @@ TEST_F(Utest_Inner_session, initialize) { auto ret = inner_session.Initialize(); EXPECT_NE(ret, ge::SUCCESS); } + +TEST_F(Utest_Inner_session, check_op_precision_mode) { + std::map options = { + {ge::OP_PRECISION_MODE, "./op_precision_mode.ini"} + }; + uint64_t session_id = 1; + InnerSession inner_session(session_id, options); + auto ret = inner_session.Initialize(); + EXPECT_NE(ret, ge::SUCCESS); +} } // namespace ge From d3bda362d969be6e1509462883743018dfc5c065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8D=8E?= Date: Thu, 24 Jun 2021 15:50:21 +0800 Subject: [PATCH 44/51] fix opt info --- ge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 543f9745..9490a59b 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -774,7 +774,7 @@ target_include_directories(ge_runner SYSTEM PRIVATE ${GE_CODE_DIR}/../inc/cce ${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external ${GE_CODE_DIR}/../abl/adump/external - ${GE_CODE_DIR}/../abl/licctrll + ${GE_CODE_DIR}/../abl/licctrl #### blue zone ${ASCEND_DIR}/driver/include ${ASCEND_DIR}/fwkacllib/include From a431199716a5e13a3b33aae0bc99b249ab440ae7 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 24 Jun 2021 16:02:18 +0800 Subject: [PATCH 45/51] Fix mem leak and recursive depth protection. --- ge/common/ge/tbe_plugin_manager.cc | 12 ++++++++++-- ge/common/ge/tbe_plugin_manager.h | 3 ++- ge/single_op/single_op_model.cc | 15 +++++---------- ge/single_op/single_op_model.h | 3 +-- ge/single_op/task/aicpu_task_builder.cc | 8 +++----- ge/single_op/task/aicpu_task_builder.h | 4 ++-- ge/single_op/task/op_task.cc | 4 +--- ge/single_op/task/op_task.h | 1 - 8 files changed, 24 insertions(+), 26 deletions(-) diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 94ba8a9a..70c1ab94 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -104,7 +104,15 @@ void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caff } } -void TBEPluginManager::FindParserSo(const string &path, vector &file_list, string &caffe_parser_path) { +void TBEPluginManager::FindParserSo(const string &path, vector &file_list, + string &caffe_parser_path, uint32_t recursive_depth) { + static const uint32_t max_recursive_depth = 20; // For recursive depth protection + + if (recursive_depth >= max_recursive_depth) { + GELOGW("Recursive depth is become %u, Please check input!", recursive_depth); + return; + } + // Path, change to absolute path string real_path = RealPath(path.c_str()); // Plugin path does not exist @@ -138,7 +146,7 @@ void TBEPluginManager::FindParserSo(const string &path, vector &file_lis ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, aicpu_host_so_suff); } else { - FindParserSo(full_name, file_list, caffe_parser_path); + FindParserSo(full_name, file_list, caffe_parser_path, recursive_depth + 1); } } mmScandirFree(entries, ret); diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h index 4bd8c6e3..eada3e64 100755 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -57,7 +57,8 @@ class TBEPluginManager { static void ProcessSoFullName(vector &file_list, string &caffe_parser_path, string &full_name, const string &caffe_parser_so_suff, const string &aicpu_so_suff, const string &aicpu_host_so_suff); - static void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path); + static void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path, + uint32_t recursive_depth = 0); static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 90a6362c..4491bcc0 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -376,11 +376,10 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { GELOGD("Building AICPU_TF task"); AiCpuTask *aicpu_task = nullptr; - bool depend_compute_flag = false; uint64_t singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id); GE_CHK_STATUS_RET_NOLOG( - BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id)); + BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, singleop_kernel_id)); aicpu_task->SetModelArgs(model_name_, model_id_); ParseArgTable(aicpu_task, single_op); single_op.tasks_.emplace_back(aicpu_task); @@ -457,8 +456,7 @@ Status SingleOpModel::BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask * return SUCCESS; } -Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) { +Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id) { auto iter = op_list_.find(kernel_def.op_index()); if (iter == op_list_.end()) { GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, @@ -476,12 +474,11 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC return ACL_ERROR_GE_MEMORY_ALLOCATION; } auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); - auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id); + auto ret = builder.BuildTask(*aicpu_task, model_params_, kernel_id); if (ret != SUCCESS) { GELOGE(ret, "[Build][Task] failed, kernel_id:%lu.", kernel_id); return ret; } - depend_compute_flag = (aicpu_task->GetUnknownType() == DEPEND_COMPUTE); *task = aicpu_task.release(); return SUCCESS; @@ -628,12 +625,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, } GELOGD("Building AICPU_TF task"); AiCpuTask *aicpu_task = nullptr; - bool depend_compute_flag = false; uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); - GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, - depend_compute_flag, dynamic_singleop_kernel_id)); - if (depend_compute_flag) { + GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, dynamic_singleop_kernel_id)); + if (aicpu_task->GetUnknownType() == DEPEND_COMPUTE) { if (i >= tasks.size() - 1) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Task]The copy task of the fourth operator was not found."); REPORT_INNER_ERROR("E19999", "The copy task of the fourth operator was not found."); diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index 529a442d..f3e52cc3 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -69,8 +69,7 @@ class SingleOpModel { Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); - Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, - bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); + Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, DynamicSingleOp &single_op); diff --git a/ge/single_op/task/aicpu_task_builder.cc b/ge/single_op/task/aicpu_task_builder.cc index 805b1306..1b945280 100755 --- a/ge/single_op/task/aicpu_task_builder.cc +++ b/ge/single_op/task/aicpu_task_builder.cc @@ -63,7 +63,7 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag) { + Status AiCpuTaskBuilder::InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m) { if (kernel_def_.args_size() > sizeof(STR_FWK_OP_KERNEL)) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[Check][Size]sizeof STR_FWK_OP_KERNEL is: %lu, but args_size is: %d", sizeof(STR_FWK_OP_KERNEL), kernel_def_.args_size()); @@ -83,9 +83,8 @@ namespace ge { return SUCCESS; } - Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, - bool dynamic_flag, uint64_t kernel_id) { - GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param, dynamic_flag)); + Status AiCpuTaskBuilder::BuildTask(ge::AiCpuTask &task, const SingleOpModelParam ¶m, uint64_t kernel_id) { + GE_CHK_STATUS_RET_NOLOG(InitWorkspaceAndIO(task, param)); STR_FWK_OP_KERNEL fwk_op_kernel = {0}; auto ret = SetFmkOpKernel(task.io_addr_, task.workspace_addr_, fwk_op_kernel); @@ -124,7 +123,6 @@ namespace ge { task.arg_size_ = sizeof(STR_FWK_OP_KERNEL); task.op_type_ = op_desc_->GetName(); task.task_info_ = kernel_def_.task_info(); - task.dynamic_flag_ = dynamic_flag; task.kernel_id_ = kernel_id; auto debug_info = BuildTaskUtils::GetTaskInfo(op_desc_); diff --git a/ge/single_op/task/aicpu_task_builder.h b/ge/single_op/task/aicpu_task_builder.h index fe9c9bc2..eca91254 100755 --- a/ge/single_op/task/aicpu_task_builder.h +++ b/ge/single_op/task/aicpu_task_builder.h @@ -29,12 +29,12 @@ namespace ge { AiCpuTaskBuilder(const OpDescPtr &op_desc, const domi::KernelExDef &kernel_def); ~AiCpuTaskBuilder() = default; - Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag, uint64_t kernel_id); + Status BuildTask(AiCpuTask &task, const SingleOpModelParam ¶m, uint64_t kernel_id); private: static Status SetKernelArgs(void **args, STR_FWK_OP_KERNEL &kernel); Status SetFmkOpKernel(void *io_addr, void *ws_addr, STR_FWK_OP_KERNEL &kernel); - Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m, bool dynamic_flag); + Status InitWorkspaceAndIO(AiCpuTask &task, const SingleOpModelParam ¶m); const OpDescPtr op_desc_; const domi::KernelExDef &kernel_def_; diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index e48677f8..c4bbbd90 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -623,9 +623,7 @@ Status AiCpuBaseTask::UpdateIoAddr(const vector &inputs, const vecto AiCpuTask::~AiCpuTask() { FreeHbm(args_); FreeHbm(io_addr_); - if (dynamic_flag_) { - FreeHbm(workspace_addr_); - } + FreeHbm(workspace_addr_); FreeHbm(copy_workspace_buf_); FreeHbm(copy_ioaddr_dev_); FreeHbm(copy_input_release_flag_dev_); diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index ed6cf40f..ce569ce0 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -192,7 +192,6 @@ class AiCpuTask : public AiCpuBaseTask { // host addr std::vector io_addr_host_; - bool dynamic_flag_ = false; // for copy task void *copy_task_args_buf_ = nullptr; void *copy_workspace_buf_ = nullptr; From a562b4b6be76a62f9aa4a4801b039578b4bfc5e8 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 24 Jun 2021 16:06:51 +0800 Subject: [PATCH 46/51] Fix mem leak and recursive depth protection. --- ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc | 4 ---- ge/single_op/task/op_task.cc | 1 - 2 files changed, 5 deletions(-) diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc index c2ebf654..c83a76d1 100755 --- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc +++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc @@ -64,10 +64,6 @@ Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_ GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), "[Update][SessionInfoSessionId] failed, session_id:%ld.", session_id); - bool execute_mode = !aicpu_ext_handle_.IsNeedRefreshIOAddr() && !node_item_->is_dynamic; - GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateExecuteMode(execute_mode), - "[Update][ExecuteMode] failed, node:%s.", node_name_.c_str()); - // copy task args buf GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), "[Invoke][AllocTensorBuffer]Node[%s] alloc kernel_ext_info buf failed, size=%zu", diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index c4bbbd90..dbe2c482 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -451,7 +451,6 @@ Status AiCpuBaseTask::SetExtInfoAndType(const std::string &kernel_ext_info, uint GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateSessionInfo(ULLONG_MAX, kernel_id, false), "[Update][SessionInfo] failed."); - GE_CHK_STATUS_RET(aicpu_ext_handle_->UpdateExecuteMode(true), "[Update][ExecuteMode] failed."); GE_CHK_RT_RET(rtMalloc(&ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_, aicpu_ext_handle_->GetExtInfoLen(), From cd9869c99d06ff6bf07f6b91fd5b100a04d92ceb Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 24 Jun 2021 22:14:40 +0800 Subject: [PATCH 47/51] Fix bug. --- ge/common/ge/tbe_plugin_manager.cc | 8 ++++---- ge/common/ge/tbe_plugin_manager.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ge/common/ge/tbe_plugin_manager.cc b/ge/common/ge/tbe_plugin_manager.cc index 70c1ab94..9dda4558 100755 --- a/ge/common/ge/tbe_plugin_manager.cc +++ b/ge/common/ge/tbe_plugin_manager.cc @@ -105,11 +105,11 @@ void TBEPluginManager::ProcessSoFullName(vector &file_list, string &caff } void TBEPluginManager::FindParserSo(const string &path, vector &file_list, - string &caffe_parser_path, uint32_t recursive_depth) { - static const uint32_t max_recursive_depth = 20; // For recursive depth protection + string &caffe_parser_path, int recursive_depth) { + static const int kMaxRecursiveDepth = 20; // For recursive depth protection - if (recursive_depth >= max_recursive_depth) { - GELOGW("Recursive depth is become %u, Please check input!", recursive_depth); + if (recursive_depth >= kMaxRecursiveDepth) { + GELOGW("Recursive depth is become %d, Please check input!", recursive_depth); return; } diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h index eada3e64..087ddd83 100755 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -58,7 +58,7 @@ class TBEPluginManager { const string &caffe_parser_so_suff, const string &aicpu_so_suff, const string &aicpu_host_so_suff); static void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path, - uint32_t recursive_depth = 0); + int32_t recursive_depth = 0); static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); From 2a0a6eaf2cacdc3611694abd4ae81f3c734f80f9 Mon Sep 17 00:00:00 2001 From: zhaozhixuan Date: Thu, 24 Jun 2021 22:17:54 +0800 Subject: [PATCH 48/51] Fix bug. --- ge/common/ge/tbe_plugin_manager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/common/ge/tbe_plugin_manager.h b/ge/common/ge/tbe_plugin_manager.h index 087ddd83..7350c672 100755 --- a/ge/common/ge/tbe_plugin_manager.h +++ b/ge/common/ge/tbe_plugin_manager.h @@ -58,7 +58,7 @@ class TBEPluginManager { const string &caffe_parser_so_suff, const string &aicpu_so_suff, const string &aicpu_host_so_suff); static void FindParserSo(const string &path, vector &file_list, string &caffe_parser_path, - int32_t recursive_depth = 0); + int recursive_depth = 0); static void GetPluginSoFileList(const string &path, vector &file_list, string &caffe_parser_path); static void GetCustomOpPath(std::string &customop_path); void LoadCustomOpLib(); From 6fd93375053dfa4c9e4de1ecd37b20f62e987cc5 Mon Sep 17 00:00:00 2001 From: lichun Date: Fri, 25 Jun 2021 13:47:18 +0800 Subject: [PATCH 49/51] add atc_params: check_report for ConvertModelToJson --- ge/offline/main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 14db1ded..9ad300fc 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -847,6 +847,7 @@ domi::Status GenerateInfershapeJson() { ge::Graph graph; std::map atc_params; atc_params.insert(std::pair("input_format", FLAGS_input_format)); + atc_params.insert(std::pair("check_report", FLAGS_check_report)); ret = ParseGraph(graph, atc_params, FLAGS_om.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType) FLAGS_framework, "", FLAGS_target.c_str(), (ge::RunMode) FLAGS_mode, false); if (ret != ge::SUCCESS) { From e2cad9c2ec241d2161cdaa175efe5c73dbc59080 Mon Sep 17 00:00:00 2001 From: chuxing Date: Sat, 19 Jun 2021 16:41:25 +0800 Subject: [PATCH 50/51] fixed ad3e707 from https://gitee.com/mindspore/graphengine/pulls/1821 --- ge/hybrid/model/hybrid_model_builder.cc | 22 +++++ ge/hybrid/model/hybrid_model_builder.h | 1 + ge/hybrid/node_executor/node_executor.cc | 80 ++++++++-------- ge/hybrid/node_executor/node_executor.h | 9 +- tests/ut/ge/CMakeLists.txt | 2 + .../hybrid/model/hybrid_model_builder_unittest.cc | 27 ++++++ .../hybrid/node_executor/node_executor_unittest.cc | 103 +++++++++++++++++++++ 7 files changed, 200 insertions(+), 44 deletions(-) create mode 100644 tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index 5337a0cf..b9536dba 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -1227,6 +1227,28 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr hybrid_model_.known_shape_sub_models_.emplace(parent_node, ge_model); } + GE_CHK_STATUS_RET_NOLOG(InitHcclExecutorOnDemand(ge_model)); + return SUCCESS; +} + +Status HybridModelBuilder::InitHcclExecutorOnDemand(const GeModelPtr &ge_model) { + if (NodeExecutorManager::GetInstance().IsExecutorInitialized(NodeExecutorManager::ExecutorType::HCCL)) { + return SUCCESS; + } + + // HCCL tasks in known-shaped subgraph which resides in a dynamic root graph + // still depends on the initialization of the HcclExecutor + auto tasks = ge_model->GetModelTaskDefPtr()->task(); + for (int i = 0; i < tasks.size(); ++i) { + const domi::TaskDef &task_def = tasks[i]; + auto task_type = static_cast(task_def.type()); + if (task_type == RT_MODEL_TASK_HCCL) { + const NodeExecutor *unused = nullptr; + GE_CHK_STATUS_RET_NOLOG(NodeExecutorManager::GetInstance() + .GetOrCreateExecutor(NodeExecutorManager::ExecutorType::HCCL, &unused)); + return SUCCESS; + } + } return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 92974441..9c1eb187 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -57,6 +57,7 @@ class HybridModelBuilder { Status ValidateParams(); Status LoadGraph(); Status LoadGeModel(ComputeGraph &graph, const GeModelPtr &ge_model); + static Status InitHcclExecutorOnDemand(const GeModelPtr &ge_model); Status LoadTask(NodeItem &node_item); Status LoadTasks(); Status IdentifyVariableOutputs(NodeItem &node_item, const ComputeGraphPtr &subgraph); diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc index 5f3d6e45..9e9354d9 100755 --- a/ge/hybrid/node_executor/node_executor.cc +++ b/ge/hybrid/node_executor/node_executor.cc @@ -58,8 +58,8 @@ Status NodeExecutor::CompileTask(const HybridModel &model, const NodePtr &node, } Status NodeExecutorManager::EnsureInitialized() { - GE_CHK_STATUS_RET(InitializeExecutors()); std::lock_guard lk(mu_); + ++ref_count_; if (initialized_) { return SUCCESS; } @@ -115,17 +115,14 @@ NodeExecutorManager::ExecutorType NodeExecutorManager::ResolveExecutorType(Node return it->second; } -Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) const { +Status NodeExecutorManager::GetExecutor(Node &node, const NodeExecutor **executor) { auto executor_type = ResolveExecutorType(node); + GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); const auto it = executors_.find(executor_type); if (it == executors_.end()) { - REPORT_INNER_ERROR("E19999", "Failed to get executor by type: %d.", static_cast(executor_type)); - GELOGE(INTERNAL_ERROR, "[Check][ExecutorType]Failed to get executor by type: %d.", - static_cast(executor_type)); - return INTERNAL_ERROR; + return GetOrCreateExecutor(executor_type, executor); } - GELOGD("[%s] Set node executor by type: %d.", node.GetName().c_str(), static_cast(executor_type)); *executor = it->second.get(); return SUCCESS; } @@ -178,51 +175,55 @@ Status NodeExecutorManager::CalcOpRunningParam(Node &node) const { return OpsKernelBuilderManager::Instance().CalcOpRunningParam(node); } -Status NodeExecutorManager::InitializeExecutors() { +bool NodeExecutorManager::IsExecutorInitialized(NodeExecutorManager::ExecutorType executor_type) { + std::lock_guard lk(mu_); + return executors_.find(executor_type) != executors_.end(); +} + +Status NodeExecutorManager::GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **out_executor) { std::lock_guard lk(mu_); - if (executor_initialized_) { - ++ref_count_; - GELOGI("Executor is already initialized. add ref count to [%d]", ref_count_); + const auto executor_it = executors_.find(executor_type); + if (executor_it != executors_.end()) { + *out_executor = executor_it->second.get(); return SUCCESS; } - GELOGI("Start to Initialize NodeExecutors"); - for (auto &it : builders_) { - auto engine_type = it.first; - auto build_fn = it.second; - GE_CHECK_NOTNULL(build_fn); - auto executor = std::unique_ptr(build_fn()); - if (executor == nullptr) { - REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for engine type = %d", - static_cast(engine_type)); - GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(engine_type)); - return INTERNAL_ERROR; - } + GELOGI("Start to Initialize NodeExecutor, type = %d", static_cast(executor_type)); + auto it = builders_.find(executor_type); + if (it == builders_.end()) { + REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", + static_cast(executor_type)); + GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for executor type = %d", static_cast(executor_type)); + return INTERNAL_ERROR; + } - GELOGD("Executor of engine type = %d was created successfully", static_cast(engine_type)); - auto ret = executor->Initialize(); - if (ret != SUCCESS) { - REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(engine_type)); - GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(engine_type)); - for (auto &executor_it : executors_) { - executor_it.second->Finalize(); - } - executors_.clear(); - return ret; - } + auto build_fn = it->second; + GE_CHECK_NOTNULL(build_fn); + auto executor = std::unique_ptr(build_fn()); + if (executor == nullptr) { + REPORT_CALL_ERROR("E19999", "Create NodeExecutor failed for executor type = %d", + static_cast(executor_type)); + GELOGE(INTERNAL_ERROR, "[Create][NodeExecutor] failed for engine type = %d", static_cast(executor_type)); + return INTERNAL_ERROR; + } - executors_.emplace(engine_type, std::move(executor)); + GELOGD("Executor of engine type = %d was created successfully", static_cast(executor_type)); + auto ret = executor->Initialize(); + if (ret != SUCCESS) { + REPORT_CALL_ERROR("E19999", "Initialize NodeExecutor failed for type = %d", static_cast(executor_type)); + GELOGE(ret, "[Initialize][NodeExecutor] failed for type = %d", static_cast(executor_type)); + return ret; } - ++ref_count_; - executor_initialized_ = true; - GELOGI("Initializing NodeExecutors successfully."); + *out_executor = executor.get(); + executors_.emplace(executor_type, std::move(executor)); + GELOGI("Initializing NodeExecutor successfully, type = %d", static_cast(executor_type)); return SUCCESS; } void NodeExecutorManager::FinalizeExecutors() { std::lock_guard lk(mu_); - if (!executor_initialized_) { + if (ref_count_ <= 0) { GELOGD("No need for finalizing for not initialized."); return; } @@ -237,7 +238,6 @@ void NodeExecutorManager::FinalizeExecutors() { it.second->Finalize(); } executors_.clear(); - executor_initialized_ = false; GELOGD("Done invoking Finalize successfully."); } diff --git a/ge/hybrid/node_executor/node_executor.h b/ge/hybrid/node_executor/node_executor.h index fffd4e7d..3a6f656a 100644 --- a/ge/hybrid/node_executor/node_executor.h +++ b/ge/hybrid/node_executor/node_executor.h @@ -179,8 +179,6 @@ class NodeExecutorManager { */ Status EnsureInitialized(); - Status InitializeExecutors(); - void FinalizeExecutors(); /** @@ -196,7 +194,7 @@ class NodeExecutorManager { * @param executor executor * @return SUCCESS on success, error code otherwise */ - Status GetExecutor(Node &node, const NodeExecutor **executor) const; + Status GetExecutor(Node &node, const NodeExecutor **executor); /** * Resolve executor type by node @@ -205,13 +203,16 @@ class NodeExecutorManager { */ ExecutorType ResolveExecutorType(Node &node) const; + Status GetOrCreateExecutor(ExecutorType executor_type, const NodeExecutor **executor); + + bool IsExecutorInitialized(ExecutorType executor_type); + private: std::map> executors_; std::map> builders_; std::map engine_mapping_; std::mutex mu_; bool initialized_ = false; - bool executor_initialized_ = false; int ref_count_ = 0; }; diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index e0cab729..1743a0f0 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -850,6 +850,7 @@ set(HYBRID_TEST_FILES "hybrid/executor/subgraph_executor_unittest.cc" "hybrid/executor/worker/execution_engine_unittest.cc" "hybrid/model/hybrid_model_builder_unittest.cc" + "hybrid/node_executor/node_executor_unittest.cc" "hybrid/node_executor/rts/rts_node_task_unittest.cc" "hybrid/node_executor/host_cpu/host_cpu_node_task_unittest.cc" "hybrid/node_executor/ge_local/ge_local_node_executor_unittest.cc" @@ -857,6 +858,7 @@ set(HYBRID_TEST_FILES "hybrid/executor/hybrid_model_async_executor_unittest.cc" "hybrid/executor/hybrid_model_pipeline_executor_unittest.cc" "hybrid/node_executor/aicore/aicore_task_compiler_unittest.cc" + ) set(OTHERS_TEST_FILES diff --git a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc index 2ab82350..5567aca2 100644 --- a/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc +++ b/tests/ut/ge/hybrid/model/hybrid_model_builder_unittest.cc @@ -346,4 +346,31 @@ EXPECT_EQ(hybrid_model_builder.InitVariableTensors(), SUCCESS); EXPECT_EQ(hybrid_model_builder.hybrid_model_.variable_tensors_.size(), 1); HostMemManager::Instance().var_memory_base_map_.clear(); } + +TEST_F(UtestHybridModelBuilder, TestInitHcclExecutorOnDemand) { + NodeExecutorManager::GetInstance().builders_.erase(NodeExecutorManager::ExecutorType::HCCL); + // build aicore task + domi::ModelTaskDef model_task_def; + std::shared_ptr model_task_def_ptr = make_shared(model_task_def); + GeModelPtr ge_model = make_shared(); + ge_model->SetModelTaskDef(model_task_def_ptr); + + // No hccl task + domi::TaskDef *task_def = model_task_def_ptr->add_task(); + task_def->set_type(RT_MODEL_TASK_MEMCPY_ASYNC); + ASSERT_EQ(HybridModelBuilder::InitHcclExecutorOnDemand(ge_model), SUCCESS); + + // get executor failed due to no builder + task_def = model_task_def_ptr->add_task(); + task_def->set_type(RT_MODEL_TASK_HCCL); + ASSERT_EQ(HybridModelBuilder::InitHcclExecutorOnDemand(ge_model), INTERNAL_ERROR); + + // get executor success + REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::HCCL, NodeExecutor); + ASSERT_EQ(HybridModelBuilder::InitHcclExecutorOnDemand(ge_model), SUCCESS); + + // repeat get, do not access builder + NodeExecutorManager::GetInstance().builders_.erase(NodeExecutorManager::ExecutorType::HCCL); + ASSERT_EQ(HybridModelBuilder::InitHcclExecutorOnDemand(ge_model), SUCCESS); +} } // namespace ge diff --git a/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc b/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc new file mode 100644 index 00000000..8a1240d3 --- /dev/null +++ b/tests/ut/ge/hybrid/node_executor/node_executor_unittest.cc @@ -0,0 +1,103 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#define private public +#define protected public +#include "hybrid/node_executor/node_executor.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; + +namespace ge { +using namespace hybrid; + +namespace { + bool finalized = false; +} + +class NodeExecutorTest : public testing::Test { + protected: + void SetUp() {} + void TearDown() { } +}; + +class FailureNodeExecutor : public NodeExecutor { + public: + Status Initialize() override { + return INTERNAL_ERROR; + } +}; + +class SuccessNodeExecutor : public NodeExecutor { + public: + Status Initialize() override { + initialized = true; + finalized = false; + return SUCCESS; + } + + Status Finalize() override { + finalized = true; + } + + bool initialized = false; +}; + +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICORE, FailureNodeExecutor); +REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::AICPU_TF, SuccessNodeExecutor); + +TEST_F(NodeExecutorTest, TestGetOrCreateExecutor) { + auto &manager = NodeExecutorManager::GetInstance(); + const NodeExecutor *executor = nullptr; + Status ret = SUCCESS; + // no builder + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::RESERVED, &executor); + ASSERT_EQ(ret, INTERNAL_ERROR); + // initialize failure + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICORE, &executor); + ASSERT_EQ(ret, INTERNAL_ERROR); + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); +} + +TEST_F(NodeExecutorTest, TestInitAndFinalize) { + auto &manager = NodeExecutorManager::GetInstance(); + manager.FinalizeExecutors(); + manager.EnsureInitialized(); + manager.EnsureInitialized(); + const NodeExecutor *executor = nullptr; + auto ret = manager.GetOrCreateExecutor(NodeExecutorManager::ExecutorType::AICPU_TF, &executor); + ASSERT_EQ(ret, SUCCESS); + ASSERT_TRUE(executor != nullptr); + ASSERT_TRUE(((SuccessNodeExecutor*)executor)->initialized); + manager.FinalizeExecutors(); + ASSERT_FALSE(manager.executors_.empty()); + manager.FinalizeExecutors(); + ASSERT_TRUE(manager.executors_.empty()); + ASSERT_TRUE(finalized); +} +} // namespace ge From 0cf2a5946364010ec659bd5dc24ae2e6ccb6aae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E7=A3=8A?= Date: Fri, 25 Jun 2021 14:37:56 +0800 Subject: [PATCH 51/51] update version of protobuf to v3.13.0 --- cmake/external_libs/protobuf_static.cmake | 2 +- parser | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/external_libs/protobuf_static.cmake b/cmake/external_libs/protobuf_static.cmake index b8ff90bb..51f6ffbc 100755 --- a/cmake/external_libs/protobuf_static.cmake +++ b/cmake/external_libs/protobuf_static.cmake @@ -13,7 +13,7 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR endif() if(GE_PB_PKG) - set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.8.0.tar.gz") + set(REQ_URL "${GE_PB_PKG}/libs/protobuf/v3.13.0.tar.gz") else() if (ENABLE_GITEE) set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.13.0.tar.gz") diff --git a/parser b/parser index db5ce472..c074dfa5 160000 --- a/parser +++ b/parser @@ -1 +1 @@ -Subproject commit db5ce472de0086c3e2abdaab3b0685c1d2656c96 +Subproject commit c074dfa5960d67f2910122d46d4d264dd6554aad