From: @shenwei41 Reviewed-by: @lilongfei15,@xsmq Signed-off-by: @xsmqtags/v1.2.0
@@ -2,6 +2,7 @@ | |||||
/build | /build | ||||
/output | /output | ||||
/prebuilts | /prebuilts | ||||
/cov | |||||
*.ir | *.ir | ||||
*.out | *.out | ||||
@@ -88,10 +88,8 @@ if (ENABLE_OPEN_SRC) | |||||
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | ||||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR} ${ASCEND_RUNTIME_DIR}) | ||||
if(PLATFORM STREQUAL "train") | if(PLATFORM STREQUAL "train") | ||||
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||||
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | ||||
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | ||||
find_module(resource libresource.so ${ASCEND_RUNTIME_DIR}) | |||||
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | ||||
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
@@ -101,12 +99,10 @@ if (ENABLE_OPEN_SRC) | |||||
elseif(PLATFORM STREQUAL "inference") | elseif(PLATFORM STREQUAL "inference") | ||||
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | ||||
find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | ||||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | ||||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | ||||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | ||||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
if(PRODUCT STREQUAL "flr3") | if(PRODUCT STREQUAL "flr3") | ||||
elseif(PRODUCT STREQUAL "flr1") | elseif(PRODUCT STREQUAL "flr1") | ||||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | ||||
@@ -116,17 +112,14 @@ if (ENABLE_OPEN_SRC) | |||||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | ||||
endif() | endif() | ||||
elseif(PLATFORM STREQUAL "all") | elseif(PLATFORM STREQUAL "all") | ||||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||||
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR}) | |||||
find_module(runtime libruntime.so ${ASCEND_ACL_DIR}) | |||||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
find_module(resource libresource.so ${ASCEND_ATC_DIR}) | |||||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR}) | |||||
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR}) | |||||
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR}) | |||||
find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | find_module(msprofiler_fwk_ext libmsprofiler_fwk.a ${ASCEND_RUNTIME_DIR}) | ||||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}) | |||||
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR}) | |||||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||||
find_module(msprofiler_ext libmsprofiler.a ${ASCEND_ACL_DIR}) | |||||
else() | else() | ||||
message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | message(STATUS "PLATFORM param is invalid, should be train or inference, you choose nothing!") | ||||
endif() | endif() | ||||
@@ -166,14 +166,14 @@ build_graphengine() | |||||
echo "execute command: cmake ${CMAKE_ARGS} .. failed." | echo "execute command: cmake ${CMAKE_ARGS} .. failed." | ||||
return 1 | return 1 | ||||
fi | fi | ||||
COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
COMMON_TARGET="ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt " | |||||
TARGET=${COMMON_TARGET} | TARGET=${COMMON_TARGET} | ||||
if [ "x${PLATFORM}" = "xtrain" ] | if [ "x${PLATFORM}" = "xtrain" ] | ||||
then | then | ||||
TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder fwk_atc.bin ${TARGET}" | |||||
TARGET="ge_runner fwk_atc.bin ${TARGET}" | |||||
elif [ "x${PLATFORM}" = "xinference" ] | elif [ "x${PLATFORM}" = "xinference" ] | ||||
then | then | ||||
TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
TARGET="ge_compiler atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
elif [ "X$ENABLE_GE_UT" = "Xon" ] | elif [ "X$ENABLE_GE_UT" = "Xon" ] | ||||
then | then | ||||
TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | TARGET="ut_libgraph ut_libge_multiparts_utest ut_libge_others_utest ut_libge_kernel_utest ut_libge_distinct_load_utest" | ||||
@@ -183,7 +183,7 @@ build_graphengine() | |||||
elif [ "x${PLATFORM}" = "xall" ] | elif [ "x${PLATFORM}" = "xall" ] | ||||
then | then | ||||
# build all the target | # build all the target | ||||
TARGET="" | |||||
TARGET="ge_runner ge_compiler fwk_atc.bin atc_atc.bin opensrc_ascendcl ${TARGET}" | |||||
fi | fi | ||||
make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install | ||||
@@ -198,8 +198,6 @@ g++ -v | |||||
mk_dir ${OUTPUT_PATH} | mk_dir ${OUTPUT_PATH} | ||||
build_graphengine || { echo "GraphEngine build failed."; return; } | build_graphengine || { echo "GraphEngine build failed."; return; } | ||||
echo "---------------- GraphEngine build finished ----------------" | echo "---------------- GraphEngine build finished ----------------" | ||||
#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}" | |||||
#rm -rf "${OUTPUT_PATH}/"libproto* | |||||
rm -f ${OUTPUT_PATH}/libgmock*.so | rm -f ${OUTPUT_PATH}/libgmock*.so | ||||
rm -f ${OUTPUT_PATH}/libgtest*.so | rm -f ${OUTPUT_PATH}/libgtest*.so | ||||
rm -f ${OUTPUT_PATH}/lib*_stub.so | rm -f ${OUTPUT_PATH}/lib*_stub.so | ||||
@@ -209,10 +207,6 @@ find ${OUTPUT_PATH} -name "*.so*" -print0 | xargs -0 chmod 500 | |||||
echo "---------------- GraphEngine output generated ----------------" | echo "---------------- GraphEngine output generated ----------------" | ||||
# if [[ "X$ENABLE_GE_ST" = "Xon" ]]; then | |||||
# cp ${BUILD_PATH}/graphengine/tests/st/st_resnet50_train ${OUTPUT_PATH} | |||||
# fi | |||||
if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | ||||
cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/common/graph/ut_libgraph ${OUTPUT_PATH} | ||||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_multiparts_utest ${OUTPUT_PATH} | ||||
@@ -220,9 +214,6 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_others_utest ${OUTPUT_PATH} | ||||
cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | cp ${BUILD_PATH}/tests/ut/ge/ut_libge_kernel_utest ${OUTPUT_PATH} | ||||
# if [[ "X${ENABLE_GE_UT_ONLY_COMPILE}" != "Xon" ]]; then | |||||
# export LD_LIBRARY_PATH=${D_LINK_PATH}/x86_64/:${BUILD_PATH}../third_party/prebuild/x86_64/:${BUILD_PATH}/graphengine/:/usr/local/HiAI/driver/lib64:/usr/local/HiAI/runtime/lib64:${LD_LIBRARY_PATH} | |||||
# echo ${LD_LIBRARY_PATH} | |||||
${OUTPUT_PATH}/ut_libgraph && | ${OUTPUT_PATH}/ut_libgraph && | ||||
${OUTPUT_PATH}/ut_libge_multiparts_utest && | ${OUTPUT_PATH}/ut_libge_multiparts_utest && | ||||
${OUTPUT_PATH}/ut_libge_distinct_load_utest && | ${OUTPUT_PATH}/ut_libge_distinct_load_utest && | ||||
@@ -232,17 +223,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | echo "!!! UT FAILED, PLEASE CHECK YOUR CHANGES !!!" | ||||
exit 1; | exit 1; | ||||
fi | fi | ||||
# fi | |||||
# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
echo "Generating coverage statistics, please wait..." | |||||
cd ${BASEPATH} | |||||
rm -rf ${BASEPATH}/cov | |||||
mkdir ${BASEPATH}/cov | |||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | |||||
genhtml coverage.info | |||||
echo "Generating coverage statistics, please wait..." | |||||
cd ${BASEPATH} | |||||
rm -rf ${BASEPATH}/cov | |||||
mkdir ${BASEPATH}/cov | |||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | |||||
genhtml coverage.info | |||||
fi | fi | ||||
# generate output package in tar form, including ut/st libraries/executables | # generate output package in tar form, including ut/st libraries/executables | ||||
@@ -256,6 +244,8 @@ generate_package() | |||||
ATC_PATH="atc/lib64" | ATC_PATH="atc/lib64" | ||||
ATC_BIN_PATH="atc/bin" | ATC_BIN_PATH="atc/bin" | ||||
FWK_BIN_PATH="fwkacllib/bin" | FWK_BIN_PATH="fwkacllib/bin" | ||||
FWK_INCLUDE_PATH="fwkacllib/include" | |||||
ATC_INCLUDE_PATH="atc/include" | |||||
NNENGINE_PATH="plugin/nnengine/ge_config" | NNENGINE_PATH="plugin/nnengine/ge_config" | ||||
OPSKERNEL_PATH="plugin/opskernel" | OPSKERNEL_PATH="plugin/opskernel" | ||||
@@ -277,6 +267,8 @@ generate_package() | |||||
mk_dir "${OUTPUT_PATH}/${ACL_PATH}" | mk_dir "${OUTPUT_PATH}/${ACL_PATH}" | ||||
mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" | mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}" | ||||
mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | mk_dir "${OUTPUT_PATH}/${FWK_BIN_PATH}" | ||||
mk_dir "${OUTPUT_PATH}/${FWK_INCLUDE_PATH}" | |||||
mk_dir "${OUTPUT_PATH}/${ATC_INCLUDE_PATH}" | |||||
cd "${OUTPUT_PATH}" | cd "${OUTPUT_PATH}" | ||||
@@ -289,10 +281,10 @@ generate_package() | |||||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \; | ||||
MAX_DEPTH=1 | MAX_DEPTH=1 | ||||
if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||||
then | |||||
MAX_DEPTH=2 | |||||
fi | |||||
# if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ] | |||||
# then | |||||
# MAX_DEPTH=2 | |||||
# fi | |||||
for lib in "${PLUGIN_OPSKERNEL[@]}"; | for lib in "${PLUGIN_OPSKERNEL[@]}"; | ||||
do | do | ||||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \; | ||||
@@ -318,7 +310,15 @@ generate_package() | |||||
find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; | find ./lib/atclib -name atc.bin -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \; | ||||
find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; | find ./lib/fwkacl -name atc.bin -exec cp {} "${OUTPUT_PATH}/${FWK_BIN_PATH}" \; | ||||
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; | find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \; | ||||
cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${ATC_INCLUDE_PATH} | |||||
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${ATC_INCLUDE_PATH} | |||||
cp -r ${OUTPUT_PATH}/../inc/external/* ${ATC_INCLUDE_PATH} | |||||
cp -r ${OUTPUT_PATH}/../metadef/inc/external/* ${FWK_INCLUDE_PATH} | |||||
cp -r ${OUTPUT_PATH}/../parser/inc/external/* ${FWK_INCLUDE_PATH} | |||||
cp -r ${OUTPUT_PATH}/../inc/external/* ${FWK_INCLUDE_PATH} | |||||
if [ "x${PLATFORM}" = "xtrain" ] | if [ "x${PLATFORM}" = "xtrain" ] | ||||
then | then | ||||
tar -cf graphengine_lib.tar fwkacllib | tar -cf graphengine_lib.tar fwkacllib | ||||
@@ -339,4 +339,4 @@ then | |||||
find ./ -name graphengine_lib.tar -exec rm {} \; | find ./ -name graphengine_lib.tar -exec rm {} \; | ||||
tar -cf graphengine_lib.tar lib | tar -cf graphengine_lib.tar lib | ||||
fi | fi | ||||
echo "---------------- GraphEngine package archive generated ----------------" | |||||
echo "---------------- GraphEngine package archive generated ----------------" |
@@ -639,15 +639,6 @@ set(INFER_SRC_LIST | |||||
"graph/load/model_manager/task_info/model_exit_task_info.cc" | "graph/load/model_manager/task_info/model_exit_task_info.cc" | ||||
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | ||||
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | "graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | ||||
"single_op/task/op_task.cc" | |||||
"single_op/task/build_task_utils.cc" | |||||
"single_op/task/tbe_task_builder.cc" | |||||
"single_op/task/aicpu_task_builder.cc" | |||||
"single_op/task/aicpu_kernel_task_builder.cc" | |||||
"single_op/single_op.cc" | |||||
"single_op/single_op_model.cc" | |||||
"single_op/stream_resource.cc" | |||||
"single_op/single_op_manager.cc" | |||||
"hybrid/hybrid_davinci_model_stub.cc" | "hybrid/hybrid_davinci_model_stub.cc" | ||||
"ir_build/ge_ir_build.cc" | "ir_build/ge_ir_build.cc" | ||||
"ir_build/atc_ir_common.cc" | "ir_build/atc_ir_common.cc" | ||||
@@ -703,11 +694,13 @@ target_compile_definitions(ge_runner PRIVATE | |||||
FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
DAVINCI_CLOUD | DAVINCI_CLOUD | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_compile_options(ge_runner PRIVATE | target_compile_options(ge_runner PRIVATE | ||||
-O2 | -O2 | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | ||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | ||||
) | ) | ||||
@@ -738,6 +731,10 @@ target_include_directories(ge_runner SYSTEM PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(ge_runner PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_runner PRIVATE | target_link_libraries(ge_runner PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
adump_server | adump_server | ||||
@@ -772,11 +769,13 @@ target_compile_definitions(ge_compiler PRIVATE | |||||
FMK_HOST_INFER | FMK_HOST_INFER | ||||
COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_compile_options(ge_compiler PRIVATE | target_compile_options(ge_compiler PRIVATE | ||||
-O2 | -O2 | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | ||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | $<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | ||||
) | ) | ||||
@@ -807,6 +806,10 @@ target_include_directories(ge_compiler SYSTEM PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(ge_compiler PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_compiler PRIVATE | target_link_libraries(ge_compiler PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
static_mmpa | static_mmpa | ||||
@@ -868,6 +871,7 @@ target_compile_options(opensrc_ascendcl PRIVATE | |||||
-O2 | -O2 | ||||
-fvisibility=hidden | -fvisibility=hidden | ||||
) | ) | ||||
target_link_options(opensrc_ascendcl PRIVATE | target_link_options(opensrc_ascendcl PRIVATE | ||||
-rdynamic | -rdynamic | ||||
-Wl,--allow-multiple-definition | -Wl,--allow-multiple-definition | ||||
@@ -875,6 +879,7 @@ target_link_options(opensrc_ascendcl PRIVATE | |||||
-Wl,-Bsymbolic | -Wl,-Bsymbolic | ||||
-Wl,--exclude-libs,ALL | -Wl,--exclude-libs,ALL | ||||
) | ) | ||||
target_link_libraries(opensrc_ascendcl PRIVATE | target_link_libraries(opensrc_ascendcl PRIVATE | ||||
-Wl,--whole-archive | -Wl,--whole-archive | ||||
ge_executor | ge_executor | ||||
@@ -12,7 +12,7 @@ set(PROTO_LIST | |||||
"${METADEF_DIR}/proto/tensorflow/tensor.proto" | "${METADEF_DIR}/proto/tensorflow/tensor.proto" | ||||
"${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" | "${METADEF_DIR}/proto/tensorflow/tensor_shape.proto" | ||||
"${METADEF_DIR}/proto/tensorflow/types.proto" | "${METADEF_DIR}/proto/tensorflow/types.proto" | ||||
"${METADEF_DIR}/proto/tensorflow/versions.proto" | |||||
"${METADEF_DIR}/proto/tensorflow/versions.proto" | |||||
) | ) | ||||
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST}) | ||||
@@ -73,6 +73,7 @@ target_compile_definitions(ge_common PRIVATE | |||||
FMK_SUPPORT_DUMP | FMK_SUPPORT_DUMP | ||||
OS_CENTOS | OS_CENTOS | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
@@ -105,6 +106,10 @@ target_include_directories(ge_common PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(ge_common PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_common PRIVATE | target_link_libraries(ge_common PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
static_mmpa | static_mmpa | ||||
@@ -132,6 +137,7 @@ target_compile_definitions(ge_common_static PRIVATE | |||||
$<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | $<IF:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,OS_TYPE=WIN,OS_TYPE=0> | ||||
$<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | $<$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>:SECUREC_USING_STD_SECURE_LIB=0 NOMINMAX> | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_compile_options(ge_common_static PRIVATE | target_compile_options(ge_common_static PRIVATE | ||||
@@ -181,6 +187,7 @@ target_compile_definitions(ge_common PRIVATE | |||||
OS_CENTOS | OS_CENTOS | ||||
google=ascend_private | google=ascend_private | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_compile_options(ge_common PRIVATE | target_compile_options(ge_common PRIVATE | ||||
@@ -208,6 +215,10 @@ target_include_directories(ge_common PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(ge_common PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_common PRIVATE | target_link_libraries(ge_common PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
ascend_protobuf_static | ascend_protobuf_static | ||||
@@ -598,7 +598,7 @@ bool ModelCacheHelper::IsAllocatedGraphIdSameAsCache(Json &json) const { | |||||
return false; | return false; | ||||
} | } | ||||
// Compare allocated graph id info between json and VarManager | // Compare allocated graph id info between json and VarManager | ||||
std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||||
std::map<std::string, uint32_t> allocated_graph_id; | |||||
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
@@ -667,7 +667,7 @@ bool ModelCacheHelper::IsChangedGraphIdSameAsCache(Json &json) const { | |||||
return false; | return false; | ||||
} | } | ||||
// Compare variable changed graph id info between json and VarManager | // Compare variable changed graph id info between json and VarManager | ||||
std::unordered_map<std::string, uint32_t> changed_graph_id; | |||||
std::map<std::string, uint32_t> changed_graph_id; | |||||
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse ChangedGraphId from Json."); | GELOGW("Fail to parse ChangedGraphId from Json."); | ||||
@@ -732,7 +732,7 @@ bool ModelCacheHelper::IsVarAddrMgrMapSameAsCache(Json &json) const { | |||||
} | } | ||||
// Compare variable address info between json and VarManager | // Compare variable address info between json and VarManager | ||||
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | ||||
std::unordered_set<uint64_t> var_offset_set; | |||||
std::set<uint64_t> var_offset_set; | |||||
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse VarAddrMgrMap from Json."); | GELOGW("Fail to parse VarAddrMgrMap from Json."); | ||||
@@ -942,7 +942,7 @@ Status ModelCacheHelper::RecoverAllocatedGraphId(const Json &json) const { | |||||
GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
std::unordered_map<std::string, uint32_t> allocated_graph_id; | |||||
std::map<std::string, uint32_t> allocated_graph_id; | |||||
auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | auto ret = ParseAllocatedGraphIdFromJson(json, allocated_graph_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
@@ -963,7 +963,7 @@ Status ModelCacheHelper::RecoverChangedGraphId(const Json &json) const { | |||||
GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
std::unordered_map<std::string, uint32_t> changed_graph_id; | |||||
std::map<std::string, uint32_t> changed_graph_id; | |||||
auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | auto ret = ParseChangedGraphIdFromJson(json, changed_graph_id); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse AllocatedGraphId from Json."); | GELOGW("Fail to parse AllocatedGraphId from Json."); | ||||
@@ -985,7 +985,7 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | std::vector<std::pair<std::string, VarAddrMgr>> var_addr_mgr_vector; | ||||
std::unordered_set<uint64_t> var_offset_set; | |||||
std::set<uint64_t> var_offset_set; | |||||
auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | auto ret = ParseVarAddrMgrMapFromJson(json, var_addr_mgr_vector, var_offset_set); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGW("Fail to parse VarAddrMgrMap from Json."); | GELOGW("Fail to parse VarAddrMgrMap from Json."); | ||||
@@ -1508,7 +1508,7 @@ Status ModelCacheHelper::ParseMemResourceFromJson(const Json &json, map<rtMemTyp | |||||
Status ModelCacheHelper::ParseVarAddrMgrMapFromJson( | Status ModelCacheHelper::ParseVarAddrMgrMapFromJson( | ||||
const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | const Json &json, std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | ||||
std::unordered_set<uint64_t> &var_offset_set) { | |||||
std::set<uint64_t> &var_offset_set) { | |||||
if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -1606,7 +1606,7 @@ Status ModelCacheHelper::ParseTransRoadsFromJson( | |||||
} | } | ||||
Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | ||||
std::unordered_map<std::string, uint32_t> &changed_graph_id) { | |||||
std::map<std::string, uint32_t> &changed_graph_id) { | |||||
if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -1624,7 +1624,7 @@ Status ModelCacheHelper::ParseChangedGraphIdFromJson(const Json &json, | |||||
} | } | ||||
Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, | Status ModelCacheHelper::ParseAllocatedGraphIdFromJson(const Json &json, | ||||
std::unordered_map<std::string, uint32_t> &allocated_graph_id) { | |||||
std::map<std::string, uint32_t> &allocated_graph_id) { | |||||
if (!(json.is_array() || json.is_null())) { | if (!(json.is_array() || json.is_null())) { | ||||
GELOGW("Input param json type should be null or array."); | GELOGW("Input param json type should be null or array."); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
@@ -95,15 +95,15 @@ class ModelCacheHelper { | |||||
static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource); | static Status ParseMemResourceFromJson(const Json &json, map<rtMemType_t, int64_t> &mem_resource); | ||||
static Status ParseVarAddrMgrMapFromJson(const Json &json, | static Status ParseVarAddrMgrMapFromJson(const Json &json, | ||||
std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | std::vector<std::pair<std::string, VarAddrMgr>> &var_addr_mgr_vector, | ||||
std::unordered_set<uint64_t> &var_offset_set); | |||||
std::set<uint64_t> &var_offset_set); | |||||
static Status ParseCurVarTensorDescMapFromJson( | static Status ParseCurVarTensorDescMapFromJson( | ||||
const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map); | const Json &json, std::unordered_map<std::string, ge::GeTensorDesc> &cur_var_tensor_desc_map); | ||||
static Status ParseTransRoadsFromJson(const Json &json, | static Status ParseTransRoadsFromJson(const Json &json, | ||||
std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads); | std::unordered_map<std::string, std::vector<TransNodeInfo>> &trans_roads); | ||||
static Status ParseChangedGraphIdFromJson(const Json &json, | static Status ParseChangedGraphIdFromJson(const Json &json, | ||||
std::unordered_map<std::string, uint32_t> &changed_graph_id); | |||||
std::map<std::string, uint32_t> &changed_graph_id); | |||||
static Status ParseAllocatedGraphIdFromJson(const Json &json, | static Status ParseAllocatedGraphIdFromJson(const Json &json, | ||||
std::unordered_map<std::string, uint32_t> &allocated_graph_id); | |||||
std::map<std::string, uint32_t> &allocated_graph_id); | |||||
static Status ParseBroadcastInfoFromJson(const Json &json, | static Status ParseBroadcastInfoFromJson(const Json &json, | ||||
std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info); | std::unordered_map<std::string, VarBroadCastInfo> &var_broadcast_info); | ||||
static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); | static Status GetVarNameFromVarKey(const string &var_key, const GeTensorDesc &tensor_desc, string &var_name); | ||||
@@ -88,7 +88,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { | |||||
return false; | return false; | ||||
} | } | ||||
std::unordered_set<uint32_t> record; | |||||
std::set<uint32_t> record; | |||||
for (size_t i = 0; i < device_nums; ++i) { | for (size_t i = 0; i < device_nums; ++i) { | ||||
uint32_t dev_id = deviceid_list[i]; | uint32_t dev_id = deviceid_list[i]; | ||||
if (dev_id >= static_cast<uint32_t>(dev_count)) { | if (dev_id >= static_cast<uint32_t>(dev_count)) { | ||||
@@ -167,6 +167,8 @@ target_compile_options(ge_executor PRIVATE | |||||
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | $<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common> | ||||
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd> | $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Debug>>:/MTd> | ||||
$<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT> | $<$<AND:$<STREQUAL:${TARGET_SYSTEM_NAME},Windows>,$<STREQUAL:${CMAKE_CONFIGURATION_TYPES},Release>>:/MT> | ||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-variable> | |||||
$<$<STREQUAL:${CMAKE_CXX_COMPILER_VERSION},7.3.0>:-Werror=unused-const-variable -Werror=format> | |||||
) | ) | ||||
target_compile_definitions(ge_executor PRIVATE | target_compile_definitions(ge_executor PRIVATE | ||||
@@ -178,7 +180,7 @@ target_compile_definitions(ge_executor PRIVATE | |||||
LOG_CPP | LOG_CPP | ||||
) | ) | ||||
target_include_directories(ge_executor PRIVATE | |||||
target_include_directories(ge_executor SYSTEM PRIVATE | |||||
${GE_CODE_DIR}/ge | ${GE_CODE_DIR}/ge | ||||
${GE_CODE_DIR}/inc | ${GE_CODE_DIR}/inc | ||||
${GE_CODE_DIR}/inc/external | ${GE_CODE_DIR}/inc/external | ||||
@@ -212,12 +214,14 @@ target_compile_options(ge_executor_shared PRIVATE | |||||
-Werror | -Werror | ||||
-O2 | -O2 | ||||
-Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(ge_executor_shared PRIVATE | target_compile_definitions(ge_executor_shared PRIVATE | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
DAVINCI_SUPPORT_PROFILING | DAVINCI_SUPPORT_PROFILING | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(ge_executor_shared PRIVATE | target_include_directories(ge_executor_shared PRIVATE | ||||
@@ -238,6 +242,10 @@ target_include_directories(ge_executor_shared PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(ge_executor_shared PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_executor_shared PRIVATE | target_link_libraries(ge_executor_shared PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
msprofiler | msprofiler | ||||
@@ -27,10 +27,12 @@ add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
target_compile_options(ge_local_engine PRIVATE | target_compile_options(ge_local_engine PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(ge_local_engine PRIVATE | target_compile_definitions(ge_local_engine PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(ge_local_engine PRIVATE | target_include_directories(ge_local_engine PRIVATE | ||||
@@ -51,6 +53,10 @@ target_include_directories(ge_local_engine PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(ge_local_engine PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_local_engine PRIVATE | target_link_libraries(ge_local_engine PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -67,11 +73,12 @@ add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
target_compile_options(atc_ge_local_engine PRIVATE | target_compile_options(atc_ge_local_engine PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(atc_ge_local_engine PRIVATE | target_compile_definitions(atc_ge_local_engine PRIVATE | ||||
COMPILE_OMG_PACKAGE | |||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(atc_ge_local_engine PRIVATE | target_include_directories(atc_ge_local_engine PRIVATE | ||||
@@ -92,6 +99,10 @@ target_include_directories(atc_ge_local_engine PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(atc_ge_local_engine PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(atc_ge_local_engine PRIVATE | target_link_libraries(atc_ge_local_engine PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -113,10 +124,12 @@ add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDR | |||||
target_compile_options(ge_local_opskernel_builder PRIVATE | target_compile_options(ge_local_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(ge_local_opskernel_builder PRIVATE | target_compile_definitions(ge_local_opskernel_builder PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(ge_local_opskernel_builder PRIVATE | target_include_directories(ge_local_opskernel_builder PRIVATE | ||||
@@ -137,6 +150,10 @@ target_include_directories(ge_local_opskernel_builder PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(ge_local_opskernel_builder PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_local_opskernel_builder PRIVATE | target_link_libraries(ge_local_opskernel_builder PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -154,10 +171,12 @@ add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO | |||||
target_compile_options(atc_ge_local_opskernel_builder PRIVATE | target_compile_options(atc_ge_local_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | target_compile_definitions(atc_ge_local_opskernel_builder PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(atc_ge_local_opskernel_builder PRIVATE | target_include_directories(atc_ge_local_opskernel_builder PRIVATE | ||||
@@ -178,6 +197,10 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(atc_ge_local_opskernel_builder PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(atc_ge_local_opskernel_builder PRIVATE | target_link_libraries(atc_ge_local_opskernel_builder PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -200,11 +223,13 @@ add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PR | |||||
target_compile_options(ge_local_opskernel_builder_static PRIVATE | target_compile_options(ge_local_opskernel_builder_static PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | target_compile_definitions(ge_local_opskernel_builder_static PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(ge_local_opskernel_builder_static PRIVATE | target_include_directories(ge_local_opskernel_builder_static PRIVATE | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #ifndef GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | ||||
#define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #define GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <memory> | #include <memory> | ||||
#include <string> | #include <string> | ||||
@@ -32,7 +46,7 @@ namespace ge_local { | |||||
* ge local engine. | * ge local engine. | ||||
* Used for the ops not belong to any engine. eg:netoutput | * Used for the ops not belong to any engine. eg:netoutput | ||||
*/ | */ | ||||
class GeLocalEngine { | |||||
class GE_FUNC_VISIBILITY GeLocalEngine { | |||||
public: | public: | ||||
/** | /** | ||||
* get GeLocalEngine instance. | * get GeLocalEngine instance. | ||||
@@ -94,25 +108,25 @@ extern "C" { | |||||
* When Ge start, GE will invoke this interface | * When Ge start, GE will invoke this interface | ||||
* @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
*/ | */ | ||||
ge::Status Initialize(const map<string, string> &options); | |||||
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||||
/** | /** | ||||
* After the initialize, GE will invoke this interface to get the Ops kernel Store | * After the initialize, GE will invoke this interface to get the Ops kernel Store | ||||
* @param ops_kernel_map The ge local's ops kernel info | * @param ops_kernel_map The ge local's ops kernel info | ||||
*/ | */ | ||||
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
/** | /** | ||||
* After the initialize, GE will invoke this interface to get the Graph Optimizer | * After the initialize, GE will invoke this interface to get the Graph Optimizer | ||||
* @param graph_optimizers The ge local's Graph Optimizer objs | * @param graph_optimizers The ge local's Graph Optimizer objs | ||||
*/ | */ | ||||
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
/** | /** | ||||
* When the graph finished, GE will invoke this interface | * When the graph finished, GE will invoke this interface | ||||
* @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
*/ | */ | ||||
ge::Status Finalize(); | |||||
GE_FUNC_VISIBILITY ge::Status Finalize(); | |||||
} | } | ||||
#endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ | #endif // GE_GE_LOCAL_ENGINE_ENGINE_GE_LOCAL_ENGINE_H_ |
@@ -16,6 +16,20 @@ | |||||
#ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #ifndef GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
#define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #define GE_GE_LOCAL_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <mutex> | #include <mutex> | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "graph/node.h" | #include "graph/node.h" | ||||
@@ -23,7 +37,7 @@ | |||||
#include "external/../register/register.h" | #include "external/../register/register.h" | ||||
namespace ge { | namespace ge { | ||||
class HostCpuEngine { | |||||
class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
public: | public: | ||||
~HostCpuEngine() = default; | ~HostCpuEngine() = default; | ||||
@@ -22,7 +22,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace ge_local { | namespace ge_local { | ||||
class GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||||
class GE_FUNC_VISIBILITY GeLocalOpsKernelBuilder : public OpsKernelBuilder { | |||||
public: | public: | ||||
~GeLocalOpsKernelBuilder() override; | ~GeLocalOpsKernelBuilder() override; | ||||
Status Initialize(const map<std::string, std::string> &options) override; | Status Initialize(const map<std::string, std::string> &options) override; | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | #ifndef GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | ||||
#define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | #define GE_GE_LOCAL_ENGINE_OPS_KERNEL_STORE_GE_LOCAL_OPS_KERNEL_INFO_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
@@ -25,7 +39,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace ge_local { | namespace ge_local { | ||||
class GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
class GE_FUNC_VISIBILITY GeLocalOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
public: | public: | ||||
GeLocalOpsKernelInfoStore() = default; | GeLocalOpsKernelInfoStore() = default; | ||||
@@ -21,7 +21,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace ge_local { | namespace ge_local { | ||||
class GeDeletedOp : public Op { | |||||
class GE_FUNC_VISIBILITY GeDeletedOp : public Op { | |||||
public: | public: | ||||
GeDeletedOp(const Node &node, RunContext &run_context); | GeDeletedOp(const Node &node, RunContext &run_context); | ||||
@@ -21,7 +21,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace ge_local { | namespace ge_local { | ||||
class NoOp : public Op { | |||||
class GE_FUNC_VISIBILITY NoOp : public Op { | |||||
public: | public: | ||||
NoOp(const Node &node, RunContext &run_context); | NoOp(const Node &node, RunContext &run_context); | ||||
@@ -29,7 +29,7 @@ namespace ge_local { | |||||
/** | /** | ||||
* The base class for all op. | * The base class for all op. | ||||
*/ | */ | ||||
class Op { | |||||
class GE_FUNC_VISIBILITY Op { | |||||
public: | public: | ||||
Op(const Node &node, RunContext &run_context); | Op(const Node &node, RunContext &run_context); | ||||
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||||
/** | /** | ||||
* manage all the op, support create op. | * manage all the op, support create op. | ||||
*/ | */ | ||||
class OpFactory { | |||||
class GE_FUNC_VISIBILITY OpFactory { | |||||
public: | public: | ||||
static OpFactory &Instance(); | static OpFactory &Instance(); | ||||
@@ -72,7 +72,7 @@ class OpFactory { | |||||
std::vector<std::string> all_ops_; | std::vector<std::string> all_ops_; | ||||
}; | }; | ||||
class OpRegistrar { | |||||
class GE_FUNC_VISIBILITY OpRegistrar { | |||||
public: | public: | ||||
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | ||||
OpFactory::Instance().RegisterCreator(type, func); | OpFactory::Instance().RegisterCreator(type, func); | ||||
@@ -27,7 +27,7 @@ target_compile_options(ge_runtime PRIVATE | |||||
-fno-common | -fno-common | ||||
) | ) | ||||
target_compile_definitions(ge_runtime PRIVATE | |||||
target_compile_definitions(ge_runtime PRIVATE | |||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
LOG_CPP | LOG_CPP | ||||
) | ) | ||||
@@ -53,6 +53,10 @@ target_include_directories(ge_runtime PRIVATE | |||||
${CMAKE_BINARY_DIR}/proto/ge | ${CMAKE_BINARY_DIR}/proto/ge | ||||
) | ) | ||||
target_link_options(ge_runtime PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(ge_runtime PRIVATE | target_link_libraries(ge_runtime PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -1121,7 +1121,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
} | } | ||||
reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
reusable_block->ref_count_++; | |||||
reusable_blocks_[memory_type][stream_id].erase((++it).base()); | reusable_blocks_[memory_type][stream_id].erase((++it).base()); | ||||
return reusable_block; | return reusable_block; | ||||
} | } | ||||
@@ -1136,7 +1135,6 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | ||||
block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
block->ref_count_++; | |||||
block->continuous_block_ = continuous; | block->continuous_block_ = continuous; | ||||
block->batch_label_ = batch_label; | block->batch_label_ = batch_label; | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
@@ -1266,6 +1264,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in | |||||
// hccl task need align header and tail | // hccl task need align header and tail | ||||
block->first_continuous_block_ = true; | block->first_continuous_block_ = true; | ||||
block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
++(block->ref_count_); | |||||
} else { | } else { | ||||
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
@@ -1289,6 +1288,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
return nullptr, "Get no align size failed"); | return nullptr, "Get no align size failed"); | ||||
std::string symbol; | std::string symbol; | ||||
bool reuse_input = false; | |||||
if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | ||||
@@ -1303,6 +1303,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
block->SetLifeTimeEnd(life_time_); | block->SetLifeTimeEnd(life_time_); | ||||
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | ||||
block->ref_count_++; | block->ref_count_++; | ||||
reuse_input = true; | |||||
// add new size | // add new size | ||||
align_size = block_size; | align_size = block_size; | ||||
@@ -1336,7 +1337,6 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type); | ||||
} | } | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr, return nullptr, "Block is nullptr."); | ||||
int out_count_reuse_input = block->ref_count_; | |||||
int out_count = 0; | int out_count = 0; | ||||
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(), GELOGE(FAILED, "index is out of range."); return nullptr); | ||||
auto out_data_anchor = n->GetOutDataAnchor(index); | auto out_data_anchor = n->GetOutDataAnchor(index); | ||||
@@ -1351,28 +1351,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
out_count++; | out_count++; | ||||
} | } | ||||
} | } | ||||
bool reuse_input = false; | |||||
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
auto owner_node = in_anchor->GetOwnerNode(); | |||||
GE_IF_BOOL_EXEC(owner_node == nullptr, continue); | |||||
auto op_desc = owner_node->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(op_desc == nullptr, continue); | |||||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | |||||
bool dst_reuse_input = false; | |||||
uint32_t dst_reuse_input_index = 0; | |||||
auto owner_node_op_desc = op_desc->GetOutputDescPtr(i); | |||||
GE_IF_BOOL_EXEC(owner_node_op_desc == nullptr, continue); | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(*owner_node_op_desc, dst_reuse_input) != SUCCESS, | |||||
GELOGI("Get dst_reuse_input failed")); | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | |||||
GELOGI("Get dst_reuse_input_index failed")); | |||||
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | |||||
out_count_reuse_input += 1; | |||||
reuse_input = true; | |||||
} | |||||
} | |||||
} | |||||
block->ref_count_ = reuse_input ? out_count_reuse_input + out_count - 1 : out_count; | |||||
block->ref_count_ = (reuse_input && out_count != 0) ? (block->ref_count_ + out_count - 1) | |||||
: (block->ref_count_ + out_count); | |||||
return block; | return block; | ||||
} | } | ||||
@@ -1484,12 +1464,25 @@ void BlockMemAssigner::ReleaseInputNodeOutMemory(const unordered_map<string, vec | |||||
GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index, | GELOGD("node_type_indexs: %d, %s", node_type_indexs.back().index, | ||||
node_type_indexs.back().node->GetName().c_str()); | node_type_indexs.back().node->GetName().c_str()); | ||||
if ((node_type_indexs.back().node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||||
(node_type_indexs.back().index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx()))) { | |||||
bool is_block_matched = false; | |||||
for (auto &node_type_index : node_type_indexs) { | |||||
is_block_matched = (node_type_index.node == in_anchor->GetPeerOutAnchor()->GetOwnerNode()) && | |||||
(node_type_index.index == static_cast<uint32_t>(in_anchor->GetPeerOutAnchor()->GetIdx())); | |||||
if (is_block_matched) { | |||||
GELOGI("Block of peer out is matched. Peer node:%s, output index:%u, " | |||||
"current node:%s, input index:%d, block ref_count:%d.", | |||||
node_type_index.node->GetName().c_str(), node_type_index.index, | |||||
node->GetName().c_str(), in_anchor->GetIdx(), block->ref_count_); | |||||
break; | |||||
} | |||||
} | |||||
if (is_block_matched) { | |||||
ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | ReleaseMemory(block, reusable_memory, (node->GetOpDesc()->GetStreamId() == block->stream_id_)); | ||||
if (block->ref_count_ == 0 && block->same_stream_) { | if (block->ref_count_ == 0 && block->same_stream_) { | ||||
SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | SetLastUsedInputMemAttr(node, in_anchor->GetIdx()); | ||||
} | } | ||||
break; | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -1530,6 +1523,21 @@ void CheckAndGetOpReuseEnv(const string &env, vector<string> &env_vec, bool &op_ | |||||
return; | return; | ||||
} | } | ||||
void BlockMemAssigner::CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block) { | |||||
if (node == nullptr || node->GetOpDesc() == nullptr || block == nullptr) { | |||||
return; | |||||
} | |||||
int64_t stream_id = node->GetOpDesc()->GetStreamId(); | |||||
auto out_data_anchor = node->GetOutDataAnchor(static_cast<int>(idx)); | |||||
bool is_suspended = (out_data_anchor != nullptr) && (out_data_anchor->GetPeerInDataNodesSize() == 0); | |||||
if (is_suspended) { | |||||
block->ref_count_ = (block->ref_count_ != 0) ? (block->ref_count_) : (1); | |||||
stream_workspace_blocks_[block->memory_type_][stream_id].emplace_back(block); | |||||
GELOGI("The output is suspended, and will be released in allocation of next node. Name:%s, index:%u, " | |||||
"size:%zu, ref_count:%d.", node->GetName().c_str(), idx, block->Size(), block->ref_count_); | |||||
} | |||||
} | |||||
Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector<int64_t> &ranges) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
int64_t stream_id = op_desc->GetStreamId(); | int64_t stream_id = op_desc->GetStreamId(); | ||||
@@ -1560,7 +1568,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
// Allocate memory for the current node and release node memory of the same size in the workspace | // Allocate memory for the current node and release node memory of the same size in the workspace | ||||
GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | GE_IF_BOOL_EXEC(ge_disable_reuse_mem_env_ != "1", | ||||
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | ||||
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | |||||
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); | |||||
iter->second[stream_id].clear();}); | |||||
if (IsContinuousOutput(node)) { | if (IsContinuousOutput(node)) { | ||||
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | ||||
} | } | ||||
@@ -1621,6 +1630,8 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
continue; | continue; | ||||
} | } | ||||
symbol_blocks_[iter->second] = mem_block; | symbol_blocks_[iter->second] = mem_block; | ||||
// The output is suspended, and will be released in allocation of next node. | |||||
CheckAndReleaseSuspendedBlock(node, i, mem_block); | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -1648,9 +1659,6 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { | if (AssignOutputMemoryWithReuse(n, ranges) != SUCCESS) { | ||||
return; | return; | ||||
} | } | ||||
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); ++iter) { | |||||
iter->second[stream_id].clear(); | |||||
} | |||||
vector<int64_t> temp; | vector<int64_t> temp; | ||||
int64_t tatal_size = 0; | int64_t tatal_size = 0; | ||||
GetNodeWorkSpaceSize(n, temp, tatal_size); | GetNodeWorkSpaceSize(n, temp, tatal_size); | ||||
@@ -1692,6 +1700,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||||
kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag, | kWorkspace, n, static_cast<uint32_t>(i), workspace_reuse_flag, | ||||
is_op_reuse_mem_, false, memory_type); | is_op_reuse_mem_, false, memory_type); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mem_block == nullptr, continue, "failed to apply memory block."); | ||||
++(mem_block->ref_count_); | |||||
CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); | CheckWorkspaceReuse(workspace_reuse_flag, i, stream_id, mem_block, memory_type); | ||||
} | } | ||||
for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | for (auto it = reusable_blocks_.begin(); it != reusable_blocks_.end(); ++it) { | ||||
@@ -454,6 +454,8 @@ class BlockMemAssigner : public MemAssigner { | |||||
void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | ||||
void CheckAndReleaseSuspendedBlock(const NodePtr &node, uint32_t idx, MemoryBlock *block); | |||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | ||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | ||||
@@ -464,7 +466,7 @@ class BlockMemAssigner : public MemAssigner { | |||||
std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_; | std::unordered_map<std::string, std::unordered_map<uint32_t, MemoryBlock *>> node_continuous_input_blocks_; | ||||
std::unordered_map<std::string, uint32_t> node_continuous_input_counts_; | |||||
std::map<std::string, uint32_t> node_continuous_input_counts_; | |||||
// reuse memory | // reuse memory | ||||
vector<string> op_no_reuse_mem_vec_; | vector<string> op_no_reuse_mem_vec_; | ||||
@@ -528,7 +528,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||||
GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | GELOGI("[IMAS]Continuous input : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld] " | ||||
"size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | "size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | ||||
node->GetType().c_str(), peer_op_desc->GetName().c_str(),peer_out_data_anchor->GetIdx(), | |||||
peer_op_desc->GetName().c_str(), node->GetType().c_str(), peer_out_data_anchor->GetIdx(), | |||||
output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), memory_type, | ||||
is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | is_continuous_input_allocated ? 0UL : align_size, real_size, is_nopadding); | ||||
} | } | ||||
@@ -618,7 +618,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node | |||||
} | } | ||||
GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" | GELOGI("[IMAS]Continuous output : Set %s name[%s] optype[%s] output[%d] offset to [%zu] stream_id[%ld] memtype[%ld]" | ||||
" size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | " size[%zu] realsize[%ld] nopadding[%d].", node->GetOwnerComputeGraph()->GetName().c_str(), | ||||
node->GetType().c_str(), out_op_desc->GetName().c_str(), out_data_anchor->GetIdx(), | |||||
out_op_desc->GetName().c_str(), node->GetType().c_str(), out_data_anchor->GetIdx(), | |||||
output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, | output_list[out_data_anchor->GetIdx()], out_op_desc->GetStreamId(), memory_type, 0UL, | ||||
is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); | is_nopadding ? nopadding_size : tensor_desc_size, is_nopadding); | ||||
} | } | ||||
@@ -90,7 +90,7 @@ Status RunContextUtil::CreateRtModelResources(uint32_t stream_num, uint32_t even | |||||
// Create rt label | // Create rt label | ||||
for (uint32_t i = 0; i < label_num; ++i) { | for (uint32_t i = 0; i < label_num; ++i) { | ||||
rtLabel_t label = nullptr; | rtLabel_t label = nullptr; | ||||
rt_ret = rtLabelCreate(&label); | |||||
rt_ret = rtLabelCreateV2(&label, rt_model_); | |||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i); | GELOGE(RT_FAILED, "rtLabelCreate failed. rt_ret = %d, index = %u", static_cast<int>(rt_ret), i); | ||||
return RT_FAILED; | return RT_FAILED; | ||||
@@ -1226,7 +1226,7 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
} | } | ||||
} | } | ||||
Status status = ReorderEventNodes(); | |||||
Status status = whole_graph_->InsertGraphEvents(); | |||||
if (status != SUCCESS) { | if (status != SUCCESS) { | ||||
GELOGE(status, "Graph ReorderEventNodes failed"); | GELOGE(status, "Graph ReorderEventNodes failed"); | ||||
return status; | return status; | ||||
@@ -1235,22 +1235,6 @@ Status StreamAllocator::InsertSyncEventNodes() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status StreamAllocator::ReorderEventNodes() const { | |||||
Status status = whole_graph_->InsertEventNodes(); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Whole graph InsertEventNodes failed"); | |||||
return status; | |||||
} | |||||
for (const auto &subgraph : whole_graph_->GetAllSubgraphs()) { | |||||
status = subgraph->InsertEventNodes(); | |||||
if (status != SUCCESS) { | |||||
GELOGE(status, "Subgraph %s InsertEventNodes failed", subgraph->GetName().c_str()); | |||||
return status; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void StreamAllocator::DumpEvents() { | void StreamAllocator::DumpEvents() { | ||||
map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | map<int64_t, vector<NodePtr>> after_refresh_stream_nodes; | ||||
for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | for (const auto &node : whole_graph_->GetNodes(whole_graph_->GetGraphUnknownFlag())) { | ||||
@@ -74,7 +74,6 @@ class StreamAllocator { | |||||
Status RefreshContinuousEvents(); | Status RefreshContinuousEvents(); | ||||
Status InsertSyncEventNodes(); | Status InsertSyncEventNodes(); | ||||
Status ReorderEventNodes() const; | |||||
void DumpEvents(); | void DumpEvents(); | ||||
@@ -211,7 +211,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||||
// and it have no attr or group attr different | // and it have no attr or group attr different | ||||
// which means bad case, return error | // which means bad case, return error | ||||
bool call_check = true; | bool call_check = true; | ||||
std::unordered_set<int64_t> input_group_ids; | |||||
std::set<int64_t> input_group_ids; | |||||
for (const auto &input_node : node->GetInNodes()) { | for (const auto &input_node : node->GetInNodes()) { | ||||
auto iter = nodes_with_group_attr.find(input_node); | auto iter = nodes_with_group_attr.find(input_node); | ||||
if (iter == nodes_with_group_attr.end()) { | if (iter == nodes_with_group_attr.end()) { | ||||
@@ -533,13 +533,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
return GE_GRAPH_GRAPH_NODE_NULL; | return GE_GRAPH_GRAPH_NODE_NULL; | ||||
} | } | ||||
int64_t node_index = 0; | |||||
for (auto &node : all_nodes) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
op_desc->SetId(node_index++); | |||||
} | |||||
map<int64_t, vector<OpDescPtr>> all_stream_ops; | map<int64_t, vector<OpDescPtr>> all_stream_ops; | ||||
for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -784,7 +777,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin | |||||
} | } | ||||
if (graph->GetNeedIteration()) { | if (graph->GetNeedIteration()) { | ||||
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") { | |||||
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) { | |||||
profiling_point.end_index.insert(current_idx); | profiling_point.end_index.insert(current_idx); | ||||
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive", | ||||
op_desc->GetName().c_str(), current_idx); | op_desc->GetName().c_str(), current_idx); | ||||
@@ -44,7 +44,7 @@ class GE_FUNC_HOST_VISIBILITY GE_FUNC_DEV_VISIBILITY TransOpUtil { | |||||
static TransOpUtil &Instance(); | static TransOpUtil &Instance(); | ||||
typedef std::unordered_map<std::string, int> transop_index_op; | |||||
typedef std::map<std::string, int> transop_index_op; | |||||
transop_index_op transop_index_map_; | transop_index_op transop_index_map_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() { | |||||
/// @param [in] outside_addrs: model input/output memory addr | /// @param [in] outside_addrs: model input/output memory addr | ||||
/// @return: 0 for success / others for failed | /// @return: 0 for success / others for failed | ||||
/// | /// | ||||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||||
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||||
if ((args_ != nullptr) || (args_size_ > 0)) { | if ((args_ != nullptr) || (args_size_ > 0)) { | ||||
GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | GELOGE(FAILED, "Task already initialized, size: %u", args_size_); | ||||
return FAILED; | return FAILED; | ||||
@@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v | |||||
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) | ||||
AddrMapInfo addr_map_info; | AddrMapInfo addr_map_info; | ||||
for (auto &addrs : outside_addrs) { | |||||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | |||||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | |||||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||||
} | |||||
} | |||||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||||
// init src_addrs/dst_addrs | // init src_addrs/dst_addrs | ||||
size_t index = 0; | |||||
vector<uint64_t> src_addrs; | vector<uint64_t> src_addrs; | ||||
vector<uint64_t> dst_addrs; | vector<uint64_t> dst_addrs; | ||||
for (auto &addrs : outside_addrs) { | |||||
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
for (const auto &addrs : outside_addrs) { | |||||
const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); | |||||
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); | ||||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; | ||||
for (const auto &virtual_args_addr : virtual_args_addrs) { | for (const auto &virtual_args_addr : virtual_args_addrs) { | ||||
addr_map_info.addr_num += virtual_args_addr.second.size(); | |||||
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | ||||
src_addrs.push_back(mbuf_list.at(index)); | |||||
src_addrs.emplace_back(mbuf_list.at(addrs.first)); | |||||
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | ||||
} | } | ||||
} | } | ||||
index++; | |||||
} | } | ||||
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num); | |||||
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs | ||||
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); | ||||
@@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo { | |||||
~CpuTaskZeroCopy() override; | ~CpuTaskZeroCopy() override; | ||||
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } | ||||
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||||
Status Distribute() override; | Status Distribute() override; | ||||
private: | private: | ||||
@@ -842,6 +842,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
}; | }; | ||||
vector<OpDescPtr> output_op_list; | vector<OpDescPtr> output_op_list; | ||||
set<const void *> input_outside_addrs; | |||||
set<const void *> output_outside_addrs; | |||||
map<uint32_t, OpDescPtr> data_by_index; | map<uint32_t, OpDescPtr> data_by_index; | ||||
map<string, OpDescPtr> variable_by_name; | map<string, OpDescPtr> variable_by_name; | ||||
auto nodes = compute_graph->GetAllNodes(); | auto nodes = compute_graph->GetAllNodes(); | ||||
@@ -858,7 +860,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | GE_TIMESTAMP_ADD(LoadTBEKernelBinToOpDesc); | ||||
if (IsDataOp(op_desc->GetType())) { | if (IsDataOp(op_desc->GetType())) { | ||||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index) != SUCCESS) { | |||||
if (InitDataOp(compute_graph, node, data_op_index, data_by_index, input_outside_addrs) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "Data init failed, Name: %s", op_desc->GetName().c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -867,7 +869,7 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
if (op_desc->GetType() == NETOUTPUT) { | if (op_desc->GetType() == NETOUTPUT) { | ||||
if (InitNetOutput(compute_graph, node, output_op_list) != SUCCESS) { | |||||
if (InitNetOutput(compute_graph, node, output_op_list, output_outside_addrs) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "NetOutput init failed, Name: %s", op_desc->GetName().c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -961,7 +963,7 @@ void DavinciModel::SetLabelForDynamic(const NodePtr &node) { | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | ||||
map<uint32_t, OpDescPtr> &data_by_index) { | |||||
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs) { | |||||
// op_desc Checked by Init: Data, valid. | // op_desc Checked by Init: Data, valid. | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (node->GetOwnerComputeGraph() != graph) { | if (node->GetOwnerComputeGraph() != graph) { | ||||
@@ -1000,16 +1002,12 @@ Status DavinciModel::InitDataOp(const ComputeGraphPtr &graph, const NodePtr &nod | |||||
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
new_input_data_info_[data_index] = zero_copy_offset; | |||||
for (size_t index = 0; index < virtual_addr_list.size(); ++index) { | |||||
void *addr = virtual_addr_list.at(index); | |||||
if (new_input_outside_addrs_.find(addr) != new_input_outside_addrs_.end()) { | |||||
continue; | |||||
} | |||||
zero_copy_offset.SetInputOutsideAddrs(output_offset_list, addr, index, fusion_flag, real_virtual_addrs_); | |||||
new_input_outside_addrs_[addr] = zero_copy_offset; | |||||
if (input_outside_addrs.count(virtual_addr) == 0) { | |||||
int64_t output_offset = output_offset_list.at(kDataIndex); | |||||
zero_copy_offset.SetInputOutsideAddrs(output_offset, virtual_addr, fusion_flag, real_virtual_addrs_); | |||||
input_outside_addrs.insert(virtual_addr); | |||||
} | } | ||||
input_data_info_[data_index] = zero_copy_offset; | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1085,7 +1083,7 @@ bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | |||||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | ||||
/// @return Status | /// @return Status | ||||
Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, | ||||
vector<OpDescPtr> &output_op_list) { | |||||
vector<OpDescPtr> &output_op_list, set<const void *> &output_outside_addrs) { | |||||
// node->GetOpDesc Checked by Init: NetOutput, valid. | // node->GetOpDesc Checked by Init: NetOutput, valid. | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
// excludes the function op sub graph, e.g. case,if | // excludes the function op sub graph, e.g. case,if | ||||
@@ -1117,7 +1115,7 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
size_t num = new_output_data_info_.size(); | |||||
size_t num = output_data_info_.size(); | |||||
bool fusion_flag = false; | bool fusion_flag = false; | ||||
size_t input_count = input_size_list.size(); | size_t input_count = input_size_list.size(); | ||||
@@ -1131,22 +1129,22 @@ Status DavinciModel::InitNetOutput(const ComputeGraphPtr &graph, const NodePtr & | |||||
Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); | Status ret = zero_copy_offset.InitOutputDataInfo(input_size_list, virtual_addr_list, op_desc, idx, fusion_flag); | ||||
GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", | GE_IF_BOOL_EXEC(ret != SUCCESS, GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", | ||||
op_desc->GetName().c_str()); return PARAM_INVALID;); | op_desc->GetName().c_str()); return PARAM_INVALID;); | ||||
new_output_data_info_[num + idx] = zero_copy_offset; | |||||
void *addr = virtual_addr_list.at(idx); | void *addr = virtual_addr_list.at(idx); | ||||
int64_t input_offset = input_offset_list.at(idx); | int64_t input_offset = input_offset_list.at(idx); | ||||
vector<void *> tensor_addrs; | |||||
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||||
auto rslt = new_output_outside_addrs_.insert(std::pair<void *, ZeroCopyOffset>(addr, zero_copy_offset)); | |||||
if (!rslt.second) { | |||||
if (output_outside_addrs.count(addr) == 0) { | |||||
vector<void *> tensor_addrs; | |||||
zero_copy_offset.SetOutputOutsideAddrs(input_offset, fusion_flag, addr, tensor_addrs); | |||||
output_outside_addrs.insert(addr); | |||||
for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||||
void *real_addr = tensor_addrs.at(i); | |||||
DisableZeroCopy(real_addr); | |||||
real_virtual_addrs_.insert(real_addr); | |||||
} | |||||
} else { | |||||
GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); | GELOGI("same output_tensor_addr %p to different input_tensor of %s", addr, op_desc->GetName().c_str()); | ||||
DisableZeroCopy(addr); | DisableZeroCopy(addr); | ||||
} | } | ||||
for (size_t i = 0; i < tensor_addrs.size(); ++i) { | |||||
void *real_addr = tensor_addrs.at(i); | |||||
DisableZeroCopy(real_addr); | |||||
real_virtual_addrs_.insert(real_addr); | |||||
} | |||||
output_data_info_[num + idx] = zero_copy_offset; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1402,7 +1400,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||||
} | } | ||||
rtLabel_t rt_label = nullptr; | rtLabel_t rt_label = nullptr; | ||||
rtError_t rt_error = rtLabelCreateEx(&rt_label, stream); | |||||
rtError_t rt_error = rtLabelCreateExV2(&rt_label, rt_model_handle_, stream); | |||||
if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { | if (rt_error != RT_ERROR_NONE || rt_label == nullptr) { | ||||
GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); | GELOGE(INTERNAL_ERROR, "InitLabelSet: %s create label failed, error=0x%x.", op_desc->GetName().c_str(), rt_error); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
@@ -1463,27 +1461,27 @@ Status DavinciModel::LoadWithQueue() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if (input_queue_ids_.size() != new_input_data_info_.size()) { | |||||
if (input_queue_ids_.size() != input_data_info_.size()) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", | GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, "Input queue ids not match model: input_queue=%zu input_data=%zu", | ||||
input_queue_ids_.size(), new_input_data_info_.size()); | |||||
input_queue_ids_.size(), input_data_info_.size()); | |||||
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | ||||
} | } | ||||
if (output_queue_ids_.size() != new_output_data_info_.size()) { | |||||
if (output_queue_ids_.size() != output_data_info_.size()) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, | GELOGE(ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID, | ||||
"Output queue ids not match model: output_queue=%zu output_data=%zu", | "Output queue ids not match model: output_queue=%zu output_data=%zu", | ||||
output_queue_ids_.size(), new_output_data_info_.size()); | |||||
output_queue_ids_.size(), output_data_info_.size()); | |||||
return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | return ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID; | ||||
} | } | ||||
GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); | GE_CHK_STATUS_RET(AddHeadStream(), "Add head stream failed."); | ||||
// Binding input_queue and Data Op. | // Binding input_queue and Data Op. | ||||
GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); | GE_CHK_STATUS_RET(BindInputQueue(), "Launch bind input queue failed."); | ||||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, new_input_outside_addrs_), "Launch zero copy failed."); | |||||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(input_mbuf_list_, input_data_info_), "Launch zero copy failed."); | |||||
// Binding output_queue and NetOutput Op. | // Binding output_queue and NetOutput Op. | ||||
GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); | GE_CHK_STATUS_RET(BindOutputQueue(), "Launch bind output queue failed."); | ||||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, new_output_outside_addrs_), "Launch zero copy failed."); | |||||
GE_CHK_STATUS_RET(CpuTaskModelZeroCopy(output_mbuf_list_, output_data_info_), "Launch zero copy failed."); | |||||
GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); | GE_CHK_STATUS_RET(CpuActiveStream(), "Launch active entry stream failed."); | ||||
GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); | GE_CHK_STATUS_RET(CpuWaitEndGraph(), "Launch wait end graph failed."); | ||||
@@ -1499,9 +1497,9 @@ Status DavinciModel::LoadWithQueue() { | |||||
Status DavinciModel::BindInputQueue() { | Status DavinciModel::BindInputQueue() { | ||||
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | ||||
for (size_t i = 0; i < input_queue_ids_.size(); ++i) { | for (size_t i = 0; i < input_queue_ids_.size(); ++i) { | ||||
auto it = new_input_data_info_.find(i); | |||||
if (it == new_input_data_info_.end()) { | |||||
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", new_input_data_info_.size(), i); | |||||
auto it = input_data_info_.find(i); | |||||
if (it == input_data_info_.end()) { | |||||
GELOGE(FAILED, "Input not match: tensor num=%zu, Queue id index=%zu", input_data_info_.size(), i); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1555,7 +1553,7 @@ Status DavinciModel::CpuModelDequeue(uint32_t queue_id) { | |||||
} | } | ||||
Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | ||||
std::map<const void *, ZeroCopyOffset> &outside_addrs) { | |||||
const map<uint32_t, ZeroCopyOffset> &outside_addrs) { | |||||
GELOGI("Set CpuKernel model zero_copy task enter."); | GELOGI("Set CpuKernel model zero_copy task enter."); | ||||
std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_); | std::shared_ptr<CpuTaskZeroCopy> zero_copy = MakeShared<CpuTaskZeroCopy>(rt_entry_stream_); | ||||
if (zero_copy == nullptr) { | if (zero_copy == nullptr) { | ||||
@@ -1579,9 +1577,9 @@ Status DavinciModel::CpuTaskModelZeroCopy(std::vector<uintptr_t> &mbuf_list, | |||||
Status DavinciModel::BindOutputQueue() { | Status DavinciModel::BindOutputQueue() { | ||||
// Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | // Caller checked: input_queue_ids_.size() == input_size_list_.size() != input_addr_list_.size() | ||||
for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | ||||
auto it = new_output_data_info_.find(i); | |||||
if (it == new_output_data_info_.end()) { | |||||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||||
auto it = output_data_info_.find(i); | |||||
if (it == output_data_info_.end()) { | |||||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -1685,9 +1683,9 @@ Status DavinciModel::CpuWaitEndGraph() { | |||||
Status DavinciModel::BindEnqueue() { | Status DavinciModel::BindEnqueue() { | ||||
for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | for (size_t i = 0; i < output_queue_ids_.size(); ++i) { | ||||
auto it = new_output_data_info_.find(i); | |||||
if (it == new_output_data_info_.end()) { | |||||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", new_output_data_info_.size(), i); | |||||
auto it = output_data_info_.find(i); | |||||
if (it == output_data_info_.end()) { | |||||
GELOGE(FAILED, "Output not match: tensor num=%zu, Queue id index=%zu", output_data_info_.size(), i); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -2103,10 +2101,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_descs | |||||
Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data) { | ||||
rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | rtMemcpyKind_t kind = device_data ? RT_MEMCPY_DEVICE_TO_DEVICE : RT_MEMCPY_HOST_TO_DEVICE; | ||||
const std::vector<DataBuffer> &blobs = input_data.blobs; | const std::vector<DataBuffer> &blobs = input_data.blobs; | ||||
for (const auto &data : new_input_data_info_) { | |||||
for (const auto &data : input_data_info_) { | |||||
if (data.first >= blobs.size()) { | if (data.first >= blobs.size()) { | ||||
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), | GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld, op_name(%s)", blobs.size(), | ||||
new_input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||||
input_data_info_.size(), data.first, data.second.GetDataInfo().at(0).first, | |||||
data.second.GetOpName().c_str()); | data.second.GetOpName().c_str()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -2427,18 +2425,18 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
output_data.index = data_id; | output_data.index = data_id; | ||||
output_data.model_id = model_id_; | output_data.model_id = model_id_; | ||||
if (output_data.blobs.size() != new_output_data_info_.size()) { | |||||
if (output_data.blobs.size() != output_data_info_.size()) { | |||||
GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), | GELOGE(FAILED, "Output data buffer num=%zu not equal model data num=%zu", output_data.blobs.size(), | ||||
new_output_data_info_.size()); | |||||
output_data_info_.size()); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
std::vector<DataBuffer> &blobs = output_data.blobs; | std::vector<DataBuffer> &blobs = output_data.blobs; | ||||
size_t idx = 0; | size_t idx = 0; | ||||
for (const auto &output : new_output_data_info_) { | |||||
for (const auto &output : output_data_info_) { | |||||
if (output.first >= blobs.size()) { | if (output.first >= blobs.size()) { | ||||
GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), | GELOGE(FAILED, "Blobs not match: blobs=%zu, tensor=%zu, index=%u, size=%ld", blobs.size(), | ||||
new_input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||||
input_data_info_.size(), output.first, output.second.GetDataInfo().at(0).first); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -3166,8 +3164,11 @@ void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) { | |||||
/// @return None. | /// @return None. | ||||
/// | /// | ||||
void DavinciModel::SetCopyOnlyOutput() { | void DavinciModel::SetCopyOnlyOutput() { | ||||
for (const auto &output_outside_addrs : new_output_outside_addrs_) { | |||||
for (const auto &output_outside_addrs : output_data_info_) { | |||||
ZeroCopyOffset output_outside = output_outside_addrs.second; | ZeroCopyOffset output_outside = output_outside_addrs.second; | ||||
if (!output_outside.IsRelativeOffsetValid()) { | |||||
return; | |||||
} | |||||
for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < output_outside.GetAddrCount(); ++out_count) { | ||||
auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); | auto &addrs_mapping_list = output_outside.GetOutsideAddrs(); | ||||
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count]; | std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[out_count]; | ||||
@@ -3219,12 +3220,12 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
for (size_t i = 0; i < nums; ++i) { | for (size_t i = 0; i < nums; ++i) { | ||||
std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | ||||
for (auto &input_outside_addrs : new_input_outside_addrs_) { | |||||
for (auto &input_outside_addrs : input_data_info_) { | |||||
ZeroCopyOffset &input_outside = input_outside_addrs.second; | ZeroCopyOffset &input_outside = input_outside_addrs.second; | ||||
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | ||||
} | } | ||||
for (auto &output_outside_addrs : new_output_outside_addrs_) { | |||||
for (auto &output_outside_addrs : output_data_info_) { | |||||
ZeroCopyOffset &output_outside = output_outside_addrs.second; | ZeroCopyOffset &output_outside = output_outside_addrs.second; | ||||
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | ||||
} | } | ||||
@@ -3293,12 +3294,12 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||||
/// @return SUCCESS handle successfully / PARAM_INVALID for failed | /// @return SUCCESS handle successfully / PARAM_INVALID for failed | ||||
/// | /// | ||||
Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { | Status DavinciModel::CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic) { | ||||
if (UpdateIoTaskArgs(new_input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||||
if (UpdateIoTaskArgs(input_data_info_, true, input_data.blobs, is_dynamic, input_data.batch_label) != SUCCESS) { | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update input data to model failed."); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
} | } | ||||
if (UpdateIoTaskArgs(new_output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||||
if (UpdateIoTaskArgs(output_data_info_, false, output_data.blobs, is_dynamic, input_data.batch_label) != | |||||
SUCCESS) { | SUCCESS) { | ||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); | GELOGE(ACL_ERROR_GE_PARAM_INVALID, "[ZCPY] Update output data to model failed."); | ||||
return ACL_ERROR_GE_PARAM_INVALID; | return ACL_ERROR_GE_PARAM_INVALID; | ||||
@@ -675,7 +675,7 @@ class DavinciModel { | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, | ||||
map<uint32_t, OpDescPtr> &data_by_index); | |||||
map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -694,7 +694,8 @@ class DavinciModel { | |||||
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. | ||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); | |||||
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list, | |||||
set<const void *> &output_outside_addrs); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -764,7 +765,7 @@ class DavinciModel { | |||||
/// | /// | ||||
Status BindInputQueue(); | Status BindInputQueue(); | ||||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); | |||||
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -897,10 +898,8 @@ class DavinciModel { | |||||
void *global_step_addr_{nullptr}; | void *global_step_addr_{nullptr}; | ||||
uint64_t global_step_size_{0}; | uint64_t global_step_size_{0}; | ||||
map<uint32_t, ZeroCopyOffset> new_input_data_info_; | |||||
map<uint32_t, ZeroCopyOffset> new_output_data_info_; | |||||
map<const void *, ZeroCopyOffset> new_input_outside_addrs_; | |||||
map<const void *, ZeroCopyOffset> new_output_outside_addrs_; | |||||
map<uint32_t, ZeroCopyOffset> input_data_info_; | |||||
map<uint32_t, ZeroCopyOffset> output_data_info_; | |||||
set<const void *> real_virtual_addrs_; | set<const void *> real_virtual_addrs_; | ||||
@@ -100,8 +100,8 @@ class TsMemMall { | |||||
private: | private: | ||||
std::mutex mem_mutex_; | std::mutex mem_mutex_; | ||||
std::unordered_map<int64_t, void *> mem_store_size_; | |||||
std::unordered_map<void *, int64_t> mem_store_addr_; | |||||
std::map<int64_t, void *> mem_store_size_; | |||||
std::map<void *, int64_t> mem_store_addr_; | |||||
rtMemType_t mem_type_; | rtMemType_t mem_type_; | ||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const | |||||
} | } | ||||
} | } | ||||
void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||||
bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | |||||
void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, | |||||
set<const void *> &real_virtual_addrs) { | |||||
uint32_t out_count = 0; | uint32_t out_count = 0; | ||||
if (!fusion_flag) { | if (!fusion_flag) { | ||||
out_count++; | out_count++; | ||||
@@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||||
real_virtual_addrs.insert(addr); | real_virtual_addrs.insert(addr); | ||||
} else { | } else { | ||||
GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); | GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); | ||||
int64_t output_offset = output_offset_list.at(index); | |||||
for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { | for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { | ||||
if (zero_copy_basic_offset_.at(i) == output_offset) { | if (zero_copy_basic_offset_.at(i) == output_offset) { | ||||
out_count++; | out_count++; | ||||
@@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l | |||||
} | } | ||||
} | } | ||||
addr_count_ = out_count; | addr_count_ = out_count; | ||||
valid_relative_offset_ = true; | |||||
} | } | ||||
void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | ||||
@@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||||
} | } | ||||
} | } | ||||
addr_count_ = out_count; | addr_count_ = out_count; | ||||
valid_relative_offset_ = true; | |||||
} | } | ||||
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | ||||
if (!valid_relative_offset_) { | |||||
return; | |||||
} | |||||
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | ||||
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | ||||
auto args_addrs = outside_addrs_[out_count].find(outside_addr); | auto args_addrs = outside_addrs_[out_count].find(outside_addr); | ||||
@@ -43,8 +43,7 @@ class ZeroCopyOffset { | |||||
~ZeroCopyOffset(); | ~ZeroCopyOffset(); | ||||
Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | ||||
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||||
bool fusion_flag, std::set<const void *> &real_virtual_addrs); | |||||
void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs); | |||||
void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); | void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); | ||||
Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, | Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, | ||||
@@ -65,9 +64,10 @@ class ZeroCopyOffset { | |||||
// data_size of Data/Netoutput | // data_size of Data/Netoutput | ||||
int64_t GetDataSize() const { return data_size_; } | int64_t GetDataSize() const { return data_size_; } | ||||
// value of *outside_addrs_ from davinci_model | // value of *outside_addrs_ from davinci_model | ||||
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } | |||||
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; } | |||||
// name of op | // name of op | ||||
std::string GetOpName() const { return op_name_; } | std::string GetOpName() const { return op_name_; } | ||||
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; } | |||||
private: | private: | ||||
void *basic_addr_ = nullptr; | void *basic_addr_ = nullptr; | ||||
@@ -81,6 +81,7 @@ class ZeroCopyOffset { | |||||
std::vector<int64_t> zero_copy_basic_offset_; | std::vector<int64_t> zero_copy_basic_offset_; | ||||
std::vector<int64_t> zero_copy_relative_offset_; | std::vector<int64_t> zero_copy_relative_offset_; | ||||
bool valid_relative_offset_ = false; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ |
@@ -131,7 +131,7 @@ bool IsTailingOptimization() { | |||||
} | } | ||||
ge::Status CheckFpCeilingMode() { | ge::Status CheckFpCeilingMode() { | ||||
static const std::unordered_set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||||
static const std::set<std::string> kValidFpCeilingMode = {"0", "1", "2"}; | |||||
string mode; | string mode; | ||||
auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); | auto ret = ge::GetContext().GetOption("ge.fpCeilingMode", mode); | ||||
if (ret == ge::GRAPH_SUCCESS) { | if (ret == ge::GRAPH_SUCCESS) { | ||||
@@ -170,8 +170,8 @@ class VarResource { | |||||
std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | std::unordered_map<std::string, VarAddrMgr> var_addr_mgr_map_; | ||||
std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | std::unordered_map<std::string, ge::GeTensorDesc> cur_var_tensor_desc_map_; | ||||
std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | std::unordered_map<std::string, std::vector<TransNodeInfo>> var_to_trans_road_; | ||||
std::unordered_map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||||
std::unordered_map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||||
std::map<std::string, uint32_t> var_names_to_changed_graph_id_; | |||||
std::map<std::string, uint32_t> var_names_to_allocated_graph_id_; | |||||
std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_; | std::map<uint32_t, std::unordered_map<std::string, VarBroadCastInfo>> var_broad_cast_info_; | ||||
}; | }; | ||||
@@ -843,7 +843,7 @@ bool ge::GraphPartitioner::HasSecondPath(size_t src, size_t dst, size_t upper_bo | |||||
/// Avoid recursion since stack space might be limited. | /// Avoid recursion since stack space might be limited. | ||||
/// We instead keep a stack of nodes to visit. | /// We instead keep a stack of nodes to visit. | ||||
std::vector<size_t> temp_stack; | std::vector<size_t> temp_stack; | ||||
std::unordered_set<size_t> visited; | |||||
std::set<size_t> visited; | |||||
temp_stack.push_back(src); | temp_stack.push_back(src); | ||||
while (!temp_stack.empty()) { | while (!temp_stack.empty()) { | ||||
size_t cluster = temp_stack.back(); | size_t cluster = temp_stack.back(); | ||||
@@ -36,7 +36,7 @@ using PartitionMap = std::unordered_map<ComputeGraphPtr, std::string>; | |||||
using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>; | using NodetoNodeMap = std::unordered_map<NodePtr, NodePtr>; | ||||
using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>; | using EnginetoGraphMap = std::unordered_map<std::string, ComputeGraphPtr>; | ||||
using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>; | using EdgeMap = std::set<std::pair<AnchorPtr, AnchorPtr>>; | ||||
using ClusterSet = std::unordered_set<size_t>; | |||||
using ClusterSet = std::set<size_t>; | |||||
class Cluster { | class Cluster { | ||||
public: | public: | ||||
size_t index_; // corresponding to rank of node | size_t index_; // corresponding to rank of node | ||||
@@ -50,12 +50,12 @@ Status RunOpKernelWithCheck(NodePtr &node, | |||||
return FoldingPass::RunOpKernel(node, inputs, outputs); | return FoldingPass::RunOpKernel(node, inputs, outputs); | ||||
} | } | ||||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
&ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | &ConstantFoldingPass::GetGeConstantFoldingPerfStatistic() const { | ||||
return statistic_of_ge_constant_folding_; | return statistic_of_ge_constant_folding_; | ||||
} | } | ||||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> | |||||
&ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | &ConstantFoldingPass::GetOpConstantFoldingPerfStatistic() const { | ||||
return statistic_of_op_constant_folding_; | return statistic_of_op_constant_folding_; | ||||
} | } | ||||
@@ -26,11 +26,11 @@ namespace ge { | |||||
class ConstantFoldingPass : public FoldingPass { | class ConstantFoldingPass : public FoldingPass { | ||||
public: | public: | ||||
Status Run(ge::NodePtr &node) override; | Status Run(ge::NodePtr &node) override; | ||||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||||
const std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetGeConstantFoldingPerfStatistic() const; | |||||
const std::map<std::string, std::pair<std::uint64_t, uint64_t>> &GetOpConstantFoldingPerfStatistic() const; | |||||
private: | private: | ||||
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||||
std::unordered_map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||||
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_op_constant_folding_; | |||||
std::map<std::string, std::pair<std::uint64_t, uint64_t>> statistic_of_ge_constant_folding_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -372,6 +372,11 @@ NodePtr HcclContinuousMemcpyPass::CreateAssignNode(const ComputeGraphPtr &graph, | |||||
} | } | ||||
GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | GELOGI("Create Assign op:%s.", op_desc->GetName().c_str()); | ||||
if (!AttrUtils::SetBool(op_desc, ATTR_NEED_COMPILE, true)) { | |||||
GELOGE(INTERNAL_ERROR, "Set ATTR_NEED_COMPILE Attr for node:%s fail.", op_desc->GetName().c_str()); | |||||
return nullptr; | |||||
} | |||||
graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | graphStatus ret = op_desc->AddInputDesc("ref", pre_op_desc->GetOutputDesc(out_data_anchor->GetIdx())); | ||||
if (ret != GRAPH_SUCCESS) { | if (ret != GRAPH_SUCCESS) { | ||||
GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | GELOGE(INTERNAL_ERROR, "Create Assign op: add ref input desc fail."); | ||||
@@ -52,7 +52,7 @@ class HcclContinuousMemcpyPass : public GraphPass { | |||||
bool IsDataNode(const std::string& node_type); | bool IsDataNode(const std::string& node_type); | ||||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
std::map<std::string, uint32_t> node_num_map_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -50,7 +50,7 @@ class HcclMemcpyPass : public GraphPass { | |||||
bool IsDataNode(const std::string& node_type); | bool IsDataNode(const std::string& node_type); | ||||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
std::map<std::string, uint32_t> node_num_map_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -92,8 +92,7 @@ Status MultiBatchClonePass::Run(ComputeGraphPtr graph) { | |||||
} | } | ||||
// parser data dynamic info from atc parameter --input_shape | // parser data dynamic info from atc parameter --input_shape | ||||
if (multibatch::ParserDataToDynmaicInfo(batch_shapes_, GetLocalOmgContext().user_input_dims, | |||||
data_to_dynamic_info_) != SUCCESS) { | |||||
if (CheckAndParseDynamicData() != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); | GELOGE(PARAM_INVALID, "Parse each data's own dynamic info failed"); | ||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
@@ -177,6 +176,58 @@ Status MultiBatchClonePass::CollectIoNodes(const ComputeGraphPtr &graph) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status MultiBatchClonePass::CheckAndParseDynamicData() { | |||||
size_t unknown_shape_count = 0; | |||||
auto data_name_and_shape = GetLocalOmgContext().user_input_dims; | |||||
std::vector<std::string> data_name_order; | |||||
for (auto &item : data_name_and_shape) { | |||||
data_name_order.push_back(item.first); | |||||
} | |||||
if (!getnext_sink_dynamic_dims_) { | |||||
for (const auto &node : all_data_nodes_) { | |||||
auto data_desc = NodeUtils::GetOutputDesc(*node, kDataOutIndex); | |||||
auto data_shape = data_desc.GetShape(); | |||||
auto data_format = data_desc.GetFormat() == Format::FORMAT_NCHW ? "NCHW" : | |||||
data_desc.GetFormat() == Format::FORMAT_NHWC ? "NHWC" : "Others"; | |||||
auto data_name = node->GetName(); | |||||
const auto &data_shape_dims = data_shape.GetDims(); | |||||
if (std::all_of(data_shape_dims.begin(), data_shape_dims.end(), [](int64_t val) { return val >= 0; })) { | |||||
continue; | |||||
} | |||||
++unknown_shape_count; | |||||
auto iter = find(data_name_order.begin(), data_name_order.end(), data_name); | |||||
if (iter == data_name_order.end()) { | |||||
if (!GetLocalOmgContext().dynamic_batch_size.empty()) { | |||||
auto ret = multibatch::CheckDynamicBatchShape(data_shape_dims, data_name); | |||||
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic batch shape of %s.", | |||||
data_name.c_str()); return PARAM_INVALID); | |||||
} else if (!GetLocalOmgContext().dynamic_image_size.empty()) { | |||||
auto ret = multibatch::CheckDynamicImageSizeShape(data_shape_dims, data_name, data_format); | |||||
GE_IF_BOOL_EXEC(ret == false, GELOGE(PARAM_INVALID, "Failed to check dynamic image size shape of %s.", | |||||
data_name.c_str()); return PARAM_INVALID); | |||||
} else if (!GetLocalOmgContext().dynamic_dims.empty()) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10001", {"parameter", "reason"}, | |||||
{"--input_shape", "all dynamic data must be set in --input_shape"}); | |||||
GELOGE(INTERNAL_ERROR, "data: %s shape:%s must be set int --input_shape", | |||||
node->GetName().c_str(), data_shape.ToString().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
data_name_and_shape.emplace_back(data_name, data_shape_dims); | |||||
} | |||||
} | |||||
} | |||||
auto ret = multibatch::ParserDataToDynamicInfo(batch_shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | |||||
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | |||||
GELOGE(PARAM_INVALID, | |||||
"Need unknow shape data when user set --dynamic_batch_size, --dynamic_image_size or --dynamic_dims"); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | Status MultiBatchClonePass::InitParamsOfGetNext(const NodePtr &node) { | ||||
data_count_from_getnext_ = 0; | data_count_from_getnext_ = 0; | ||||
getnext_sink_dynamic_dims_ = false; | getnext_sink_dynamic_dims_ = false; | ||||
@@ -175,6 +175,8 @@ class MultiBatchClonePass : public GraphPass { | |||||
/// @return 0: SUCCESS / others: FAILED | /// @return 0: SUCCESS / others: FAILED | ||||
/// | /// | ||||
Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); | Status UpdateOutputTensor(uint32_t parent_index, uint32_t unused_num); | ||||
Status CheckAndParseDynamicData(); | |||||
std::string session_graph_id_; | std::string session_graph_id_; | ||||
std::vector<std::vector<int64_t>> batch_shapes_; | std::vector<std::vector<int64_t>> batch_shapes_; | ||||
@@ -235,7 +235,7 @@ class SwitchToStreamSwitchPass : public GraphPass { | |||||
std::vector<NodePtr> stream_switch_nodes_; | std::vector<NodePtr> stream_switch_nodes_; | ||||
std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_; | std::unordered_map<OutDataAnchorPtr, std::map<int64_t, std::vector<std::list<NodePtr>>>> cond_node_map_; | ||||
std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_; | std::unordered_map<NodePtr, std::set<std::string>> switch_node_map_; | ||||
std::unordered_map<std::string, uint32_t> node_num_map_; | |||||
std::map<std::string, uint32_t> node_num_map_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ | #endif // GE_GRAPH_PASSES_SWITCH_TO_STREAM_SWITCH_PASS_H_ |
@@ -738,7 +738,7 @@ Status MultiBatchGraphCopyer::CheckAndParseDynamicData(){ | |||||
} | } | ||||
} | } | ||||
} | } | ||||
auto ret = ParserDataToDynmaicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
auto ret = ParserDataToDynamicInfo(shapes_, data_name_and_shape, data_to_dynamic_info_); | |||||
GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | GE_CHK_STATUS_RET(ret, "Failed to parse data to dynamic info."); | ||||
if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | if (!getnext_sink_dynamic_dims_ && unknown_shape_count == 0) { | ||||
ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | ErrorManager::GetInstance().ATCReportErrMessage("E10040"); | ||||
@@ -377,7 +377,7 @@ bool InitDynamicParams(vector<vector<int64_t>> &shapes) { | |||||
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | ||||
/// @return true: Configed for Multi batch / false: Not configed for Multi batch. | /// @return true: Configed for Multi batch / false: Not configed for Multi batch. | ||||
/// | /// | ||||
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||||
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||||
vector<pair<string, vector<int64_t>>> &data_name_and_shape, | vector<pair<string, vector<int64_t>>> &data_name_and_shape, | ||||
map<string, vector<vector<int64_t>> > &data_to_dynamic_info) { | map<string, vector<vector<int64_t>> > &data_to_dynamic_info) { | ||||
size_t cur_data_index = 0; | size_t cur_data_index = 0; | ||||
@@ -74,7 +74,7 @@ Status CalcShape(const std::vector<int64_t> &batch_shape, GeShape &data_shape); | |||||
/// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | /// @param [out] map<string, vector<vector<int64_t>>> &data_to_dynamic_info: key:data_name. value:dynamic dims. | ||||
/// @return SUCCESS / PARAM_INVALID | /// @return SUCCESS / PARAM_INVALID | ||||
/// | /// | ||||
Status ParserDataToDynmaicInfo(const vector<vector<int64_t>> &shapes, | |||||
Status ParserDataToDynamicInfo(const vector<vector<int64_t>> &shapes, | |||||
vector<pair<string, vector<int64_t>>> &data_name_and_shape, | vector<pair<string, vector<int64_t>>> &data_name_and_shape, | ||||
map<string, vector<vector<int64_t>>> &data_to_dynamic_info); | map<string, vector<vector<int64_t>>> &data_to_dynamic_info); | ||||
@@ -93,7 +93,7 @@ Status StampDynamicType(const OpDescPtr &op_desc); | |||||
/// @param [in] const string &data_name: cur data name. | /// @param [in] const string &data_name: cur data name. | ||||
/// @return 0: true/false | /// @return 0: true/false | ||||
/// | /// | ||||
bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||||
GE_FUNC_VISIBILITY bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_name); | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
@@ -104,7 +104,7 @@ bool CheckDynamicBatchShape(const vector<int64_t> &shape, const string &data_nam | |||||
/// @param [in] const std::string &input_format: format of input. | /// @param [in] const std::string &input_format: format of input. | ||||
/// @return 0: true/false | /// @return 0: true/false | ||||
/// | /// | ||||
bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||||
GE_FUNC_VISIBILITY bool CheckDynamicImageSizeShape(const vector<int64_t> &shape, const string &data_name, | |||||
const std::string &input_format); | const std::string &input_format); | ||||
} // namespace multibatch | } // namespace multibatch | ||||
@@ -21,10 +21,12 @@ add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
target_compile_options(host_cpu_engine PRIVATE | target_compile_options(host_cpu_engine PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(host_cpu_engine PRIVATE | target_compile_definitions(host_cpu_engine PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(host_cpu_engine PRIVATE | target_include_directories(host_cpu_engine PRIVATE | ||||
@@ -44,6 +46,10 @@ target_include_directories(host_cpu_engine PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(host_cpu_engine PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(host_cpu_engine PRIVATE | target_link_libraries(host_cpu_engine PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -60,11 +66,12 @@ add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS}) | |||||
target_compile_options(atc_host_cpu_engine PRIVATE | target_compile_options(atc_host_cpu_engine PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(atc_host_cpu_engine PRIVATE | target_compile_definitions(atc_host_cpu_engine PRIVATE | ||||
COMPILE_OMG_PACKAGE | |||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(atc_host_cpu_engine PRIVATE | target_include_directories(atc_host_cpu_engine PRIVATE | ||||
@@ -84,6 +91,10 @@ target_include_directories(atc_host_cpu_engine PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(atc_host_cpu_engine PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(atc_host_cpu_engine PRIVATE | target_link_libraries(atc_host_cpu_engine PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -105,10 +116,12 @@ add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
target_compile_options(host_cpu_opskernel_builder PRIVATE | target_compile_options(host_cpu_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(host_cpu_opskernel_builder PRIVATE | target_compile_definitions(host_cpu_opskernel_builder PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(host_cpu_opskernel_builder PRIVATE | target_include_directories(host_cpu_opskernel_builder PRIVATE | ||||
@@ -128,6 +141,10 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(host_cpu_opskernel_builder PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(host_cpu_opskernel_builder PRIVATE | target_link_libraries(host_cpu_opskernel_builder PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -145,10 +162,12 @@ add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST}) | |||||
target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | target_compile_options(atc_host_cpu_opskernel_builder PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | ||||
@@ -168,6 +187,10 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(atc_host_cpu_opskernel_builder PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -190,11 +213,13 @@ add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST}) | |||||
target_compile_options(host_cpu_opskernel_builder_static PRIVATE | target_compile_options(host_cpu_opskernel_builder_static PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE | ||||
google=ascend_private | google=ascend_private | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(host_cpu_opskernel_builder_static PRIVATE | target_include_directories(host_cpu_opskernel_builder_static PRIVATE | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #ifndef GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
#define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #define GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <memory> | #include <memory> | ||||
#include <string> | #include <string> | ||||
@@ -32,7 +46,7 @@ namespace host_cpu { | |||||
* host cpu engine. | * host cpu engine. | ||||
* Used for the ops which executes on host. | * Used for the ops which executes on host. | ||||
*/ | */ | ||||
class HostCpuEngine { | |||||
class GE_FUNC_VISIBILITY HostCpuEngine { | |||||
public: | public: | ||||
/** | /** | ||||
* get HostCpuEngine instance. | * get HostCpuEngine instance. | ||||
@@ -87,25 +101,25 @@ extern "C" { | |||||
* When Ge start, GE will invoke this interface | * When Ge start, GE will invoke this interface | ||||
* @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
*/ | */ | ||||
ge::Status Initialize(const map<string, string> &options); | |||||
GE_FUNC_VISIBILITY ge::Status Initialize(const map<string, string> &options); | |||||
/** | /** | ||||
* After the initialize, GE will invoke this interface to get the Ops kernel Store | * After the initialize, GE will invoke this interface to get the Ops kernel Store | ||||
* @param ops_kernel_map The host cpu's ops kernel info | * @param ops_kernel_map The host cpu's ops kernel info | ||||
*/ | */ | ||||
void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
GE_FUNC_VISIBILITY void GetOpsKernelInfoStores(std::map<std::string, OpsKernelInfoStorePtr> &ops_kernel_map); | |||||
/** | /** | ||||
* After the initialize, GE will invoke this interface to get the Graph Optimizer | * After the initialize, GE will invoke this interface to get the Graph Optimizer | ||||
* @param graph_optimizers The host cpu's Graph Optimizer objs | * @param graph_optimizers The host cpu's Graph Optimizer objs | ||||
*/ | */ | ||||
void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
GE_FUNC_VISIBILITY void GetGraphOptimizerObjs(std::map<std::string, GraphOptimizerPtr> &graph_optimizers); | |||||
/** | /** | ||||
* When the graph finished, GE will invoke this interface | * When the graph finished, GE will invoke this interface | ||||
* @return The status whether initialize successfully | * @return The status whether initialize successfully | ||||
*/ | */ | ||||
ge::Status Finalize(); | |||||
GE_FUNC_VISIBILITY ge::Status Finalize(); | |||||
} | } | ||||
#endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ | #endif // GE_HOST_CPU_ENGINE_ENGINE_HOST_CPU_ENGINE_H_ |
@@ -17,11 +17,25 @@ | |||||
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | ||||
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_BUILDER_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include "common/opskernel/ops_kernel_builder.h" | #include "common/opskernel/ops_kernel_builder.h" | ||||
namespace ge { | namespace ge { | ||||
namespace host_cpu { | namespace host_cpu { | ||||
class HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||||
class GE_FUNC_VISIBILITY HostCpuOpsKernelBuilder : public OpsKernelBuilder { | |||||
public: | public: | ||||
Status Initialize(const map<std::string, std::string> &options) override; | Status Initialize(const map<std::string, std::string> &options) override; | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | #ifndef GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | ||||
#define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | #define GE_HOST_CPU_ENGINE_OPS_KERNEL_STORE_HOST_CPU_OPS_KERNEL_INFO_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
@@ -25,7 +39,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace host_cpu { | namespace host_cpu { | ||||
class HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
class GE_FUNC_VISIBILITY HostCpuOpsKernelInfoStore : public OpsKernelInfoStore { | |||||
public: | public: | ||||
HostCpuOpsKernelInfoStore() {} | HostCpuOpsKernelInfoStore() {} | ||||
~HostCpuOpsKernelInfoStore() override = default; | ~HostCpuOpsKernelInfoStore() override = default; | ||||
@@ -21,7 +21,7 @@ | |||||
namespace ge { | namespace ge { | ||||
namespace host_cpu { | namespace host_cpu { | ||||
class HostOp : public Op { | |||||
class GE_FUNC_VISIBILITY HostOp : public Op { | |||||
public: | public: | ||||
HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | HostOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | ||||
~HostOp() override = default; | ~HostOp() override = default; | ||||
@@ -29,7 +29,7 @@ namespace host_cpu { | |||||
/** | /** | ||||
* The base class for all op. | * The base class for all op. | ||||
*/ | */ | ||||
class Op { | |||||
class GE_FUNC_VISIBILITY Op { | |||||
public: | public: | ||||
Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | Op(const Node &node, RunContext &run_context) : run_context_(run_context), node_(node) {} | ||||
virtual ~Op() = default; | virtual ~Op() = default; | ||||
@@ -32,7 +32,7 @@ using OP_CREATOR_FUNC = std::function<std::shared_ptr<Op>(const Node &, RunConte | |||||
/** | /** | ||||
* manage all the op, support create op. | * manage all the op, support create op. | ||||
*/ | */ | ||||
class OpFactory { | |||||
class GE_FUNC_VISIBILITY OpFactory { | |||||
public: | public: | ||||
static OpFactory &Instance(); | static OpFactory &Instance(); | ||||
@@ -70,7 +70,7 @@ class OpFactory { | |||||
std::vector<std::string> all_ops_; | std::vector<std::string> all_ops_; | ||||
}; | }; | ||||
class OpRegistrar { | |||||
class GE_FUNC_VISIBILITY OpRegistrar { | |||||
public: | public: | ||||
OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | OpRegistrar(const std::string &type, const OP_CREATOR_FUNC &func) { | ||||
OpFactory::Instance().RegisterCreator(type, func); | OpFactory::Instance().RegisterCreator(type, func); | ||||
@@ -71,7 +71,7 @@ TensorValue::TensorValue(void *buffer, size_t size) : ref_buffer_(buffer), ref_s | |||||
TensorValue::~TensorValue() { Destroy(); } | TensorValue::~TensorValue() { Destroy(); } | ||||
void TensorValue::Destroy() { | void TensorValue::Destroy() { | ||||
if (buffer_ != nullptr || ref_buffer_ != nullptr) { | |||||
if (buffer_ != nullptr) { | |||||
GELOGD("Unref tensor: %s", DebugString().c_str()); | GELOGD("Unref tensor: %s", DebugString().c_str()); | ||||
buffer_.reset(); | buffer_.reset(); | ||||
} | } | ||||
@@ -71,12 +71,14 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, | |||||
GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | GE_CHK_STATUS_RET_NOLOG(ResetExecutionContext(context_)); | ||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[InitContext] End"); | ||||
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc), "Failed to execute partitioned call."); | |||||
HYBRID_CHK_STATUS_RET(executor.ExecuteAsync(args.inputs, args.input_desc, args.outputs), | |||||
"Failed to execute partitioned call."); | |||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[ExecuteAsync] End"); | ||||
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); | ||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); | ||||
args.outputs.clear(); | |||||
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); | ||||
RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | RECORD_MODEL_EXECUTION_EVENT(&context_, "[GetOutput] End"); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -131,10 +131,14 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue> | |||||
} | } | ||||
Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | ||||
const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
const std::vector<TensorValue> &outputs) { | |||||
GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false"); | ||||
GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str()); | ||||
if (!outputs.empty()) { | |||||
GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs), | |||||
"Failed to enable output zero copy by user provided outputs."); | |||||
} | |||||
if (!graph_item_->IsDynamic()) { | if (!graph_item_->IsDynamic()) { | ||||
return ExecuteAsyncForKnownShape(inputs); | return ExecuteAsyncForKnownShape(inputs); | ||||
} | } | ||||
@@ -144,6 +148,11 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
const std::vector<ConstGeTensorDescPtr> &input_desc) { | |||||
return ExecuteAsync(inputs, input_desc, {}); | |||||
} | |||||
Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | Status SubgraphExecutor::ExecuteAsyncForKnownShape(const std::vector<TensorValue> &inputs) { | ||||
GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | GELOGD("[%s] subgraph is not dynamic.", graph_item_->GetName().c_str()); | ||||
if (graph_item_->GetAllNodes().size() != 1) { | if (graph_item_->GetAllNodes().size() != 1) { | ||||
@@ -440,5 +449,37 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs) { | |||||
GELOGD("To enable zero copy, output number = %zu", outputs.size()); | |||||
const auto &output_edges = graph_item_->GetOutputEdges(); | |||||
// Op -> MetOutput, set the output tensor of Op that output to the NetOutput node | |||||
if (outputs.size() != output_edges.size()) { | |||||
GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu", | |||||
output_edges.size(), | |||||
outputs.size()); | |||||
return PARAM_INVALID; | |||||
} | |||||
for (size_t i = 0; i < outputs.size(); ++i) { | |||||
auto &output_tensor = outputs[i]; | |||||
auto &output_node = output_edges[i].first; | |||||
int output_idx = output_edges[i].second; | |||||
GELOGD("[%s] Set output tensor[%zu] to [%s]'s output[%d], tensor = %s", | |||||
graph_item_->GetName().c_str(), | |||||
i, | |||||
output_node->NodeName().c_str(), | |||||
output_idx, | |||||
output_tensor.DebugString().c_str()); | |||||
GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor), | |||||
"[%s] Failed to set input tensor[%zu]", | |||||
graph_item_->GetName().c_str(), | |||||
i); | |||||
} | |||||
GELOGD("Done enabling zero copy for outputs successfully."); | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -43,7 +43,19 @@ class SubgraphExecutor { | |||||
* @param input_desc input tensor descriptions | * @param input_desc input tensor descriptions | ||||
* @return SUCCESS on success, error code otherwise | * @return SUCCESS on success, error code otherwise | ||||
*/ | */ | ||||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
const std::vector<ConstGeTensorDescPtr> &input_desc); | |||||
/** | |||||
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are | |||||
* valid after this method returned | |||||
* @param inputs input tensors | |||||
* @param input_desc input tensor descriptions | |||||
* @return SUCCESS on success, error code otherwise | |||||
*/ | |||||
Status ExecuteAsync(const std::vector<TensorValue> &inputs, | |||||
const std::vector<ConstGeTensorDescPtr> &input_desc, | |||||
const std::vector<TensorValue> &outputs); | |||||
/** | /** | ||||
* Execute subgraph async, output tensor address(not data) and output tensor descriptions are | * Execute subgraph async, output tensor address(not data) and output tensor descriptions are | ||||
@@ -76,6 +88,7 @@ class SubgraphExecutor { | |||||
private: | private: | ||||
Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | Status PrepareForExecution(GraphExecutionContext *ctx, NodeState &node_state); | ||||
Status EnableOutputZeroCopy(const std::vector<TensorValue> &outputs); | |||||
static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | static Status InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state); | ||||
Status Init(const std::vector<TensorValue> &inputs, | Status Init(const std::vector<TensorValue> &inputs, | ||||
const std::vector<ConstGeTensorDescPtr> &input_desc); | const std::vector<ConstGeTensorDescPtr> &input_desc); | ||||
@@ -40,9 +40,14 @@ HybridModel::~HybridModel() { | |||||
GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | GELOGD("[%s] HybridModel destroyed.", model_name_.c_str()); | ||||
} | } | ||||
Status HybridModel::Init() { | |||||
Status HybridModel::Init(bool is_single_op) { | |||||
GELOGD("Start to init hybrid model."); | GELOGD("Start to init hybrid model."); | ||||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
is_single_op_ = is_single_op; | |||||
if (is_single_op) { | |||||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).BuildForSingleOp(), "Failed to build hybrid model."); | |||||
} else { | |||||
GE_CHK_STATUS_RET(HybridModelBuilder(*this).Build(), "Failed to build hybrid model."); | |||||
} | |||||
GELOGD("HybridModel initialized successfully."); | GELOGD("HybridModel initialized successfully."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -37,7 +37,7 @@ class HybridModel { | |||||
~HybridModel(); | ~HybridModel(); | ||||
Status Init(); | |||||
Status Init(bool is_single_op = false); | |||||
const NodeItem *GetNodeItem(const NodePtr &node) const; | const NodeItem *GetNodeItem(const NodePtr &node) const; | ||||
@@ -69,6 +69,10 @@ class HybridModel { | |||||
return model_id_; | return model_id_; | ||||
} | } | ||||
bool IsSingleOp() const { | |||||
return is_single_op_; | |||||
} | |||||
TensorValue* GetVariable(const string &name) const; | TensorValue* GetVariable(const string &name) const; | ||||
NodePtr GetVariableNode(const string &name) const; | NodePtr GetVariableNode(const string &name) const; | ||||
@@ -131,11 +135,13 @@ class HybridModel { | |||||
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_; | ||||
bool is_new_model_desc_ = false; // support aipp | bool is_new_model_desc_ = false; // support aipp | ||||
bool is_single_op_ = false; | |||||
// runtime fields | // runtime fields | ||||
uint32_t device_id_ = 0; | uint32_t device_id_ = 0; | ||||
uint32_t model_id_ = 0; | uint32_t model_id_ = 0; | ||||
uint8_t *var_mem_base_ = nullptr; | uint8_t *var_mem_base_ = nullptr; | ||||
std::unique_ptr<TensorBuffer> weight_buffer_; | |||||
RuntimeParam root_runtime_param_; | RuntimeParam root_runtime_param_; | ||||
}; | }; | ||||
} // namespace hybrid | } // namespace hybrid | ||||
@@ -147,6 +147,21 @@ Status HybridModelBuilder::Build() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HybridModelBuilder::BuildForSingleOp() { | |||||
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel"); | |||||
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName(); | |||||
GELOGI("[%s] Start to build hybrid model.", GetGraphName()); | |||||
auto ret = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
const GeModelPtr ge_model = ret[ge_root_model_->GetRootGraph()->GetName()]; | |||||
GE_CHK_STATUS_RET(IndexTaskDefs(ge_root_model_->GetRootGraph(), ge_model), | |||||
"[%s] Failed to index task defs", GetGraphName()); | |||||
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName()); | |||||
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName()); | |||||
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName()); | |||||
GELOGI("[%s] Done building hybrid model for single op successfully.", GetGraphName()); | |||||
return SUCCESS; | |||||
} | |||||
Status HybridModelBuilder::ValidateParams() { | Status HybridModelBuilder::ValidateParams() { | ||||
GE_CHECK_NOTNULL(ge_root_model_); | GE_CHECK_NOTNULL(ge_root_model_); | ||||
GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model_->GetRootGraph()); | ||||
@@ -951,46 +966,71 @@ Status HybridModelBuilder::InitVariableTensors() { | |||||
} | } | ||||
Status HybridModelBuilder::InitWeights() { | Status HybridModelBuilder::InitWeights() { | ||||
// For constant in root graph | |||||
const auto &root_graph = ge_root_model_->GetRootGraph(); | |||||
const auto &subgraph_models = ge_root_model_->GetSubgraphInstanceNameToModel(); | |||||
auto iter = subgraph_models.find(root_graph->GetName()); | |||||
if (iter == subgraph_models.end()) { | |||||
GELOGD("Root graph model not found"); | |||||
return SUCCESS; | |||||
} | |||||
auto &root_model = iter->second; | |||||
const auto &weight_buffer = root_model->GetWeight(); | |||||
if (weight_buffer.GetSize() == 0) { | |||||
GELOGD("weight is empty"); | |||||
return SUCCESS; | |||||
} | |||||
auto allocator = NpuMemoryAllocator::GetAllocator(); | auto allocator = NpuMemoryAllocator::GetAllocator(); | ||||
GE_CHECK_NOTNULL(allocator); | GE_CHECK_NOTNULL(allocator); | ||||
for (auto &it : hybrid_model_.node_items_) { | |||||
auto &node_item = it.second; | |||||
if (node_item->node_type != CONSTANT) { | |||||
hybrid_model_.weight_buffer_ = TensorBuffer::Create(allocator, weight_buffer.size()); | |||||
GE_CHECK_NOTNULL(hybrid_model_.weight_buffer_); | |||||
auto weight_base = reinterpret_cast<uint8_t *>(hybrid_model_.weight_buffer_->GetData()); | |||||
GE_CHK_RT_RET(rtMemcpy(weight_base, | |||||
hybrid_model_.weight_buffer_->GetSize(), | |||||
weight_buffer.GetData(), | |||||
weight_buffer.GetSize(), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | |||||
GELOGI("Init weight mem successfully, weight base %p, weight size = %zu", | |||||
weight_base, | |||||
hybrid_model_.weight_buffer_->GetSize()); | |||||
for (auto &node : root_graph->GetDirectNode()) { | |||||
if (node->GetType() != CONSTANT) { | |||||
continue; | continue; | ||||
} | } | ||||
const auto &constant_node = node_item->node; | |||||
auto op_desc = constant_node->GetOpDesc(); | |||||
auto op_desc = node->GetOpDesc(); | |||||
auto v_weights = ModelUtils::GetWeights(op_desc); | auto v_weights = ModelUtils::GetWeights(op_desc); | ||||
if (v_weights.empty()) { | if (v_weights.empty()) { | ||||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", constant_node->GetName().c_str()); | |||||
GELOGE(INTERNAL_ERROR, "[%s] Constant has no value", node->GetName().c_str()); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | auto *ge_tensor = const_cast<GeTensor *>(v_weights[0].get()); | ||||
auto output_desc = op_desc->MutableOutputDesc(0); | |||||
GE_CHECK_NOTNULL(output_desc); | |||||
auto tensor_size = ge_tensor->GetData().GetSize(); | |||||
GELOGD("[%s] Start to init Constant node [%s], size = %ld", | |||||
GE_CHECK_NOTNULL(ge_tensor); | |||||
const GeTensorDesc &tensor_desc = ge_tensor->GetTensorDesc(); | |||||
int64_t tensor_size = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetSize(*op_desc->MutableOutputDesc(0), tensor_size), | |||||
"[%s] Failed to get tensor size", | |||||
node->GetName().c_str()); | |||||
int64_t data_offset = 0; | |||||
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetDataOffset(tensor_desc, data_offset), | |||||
"[%s] Failed to get data offset", | |||||
node->GetName().c_str()); | |||||
GELOGD("[%s] Start to init Constant node [%s], size = %ld, offset = %ld", | |||||
GetGraphName(), | GetGraphName(), | ||||
constant_node->GetName().c_str(), | |||||
tensor_size); | |||||
node->GetName().c_str(), | |||||
tensor_size, | |||||
data_offset); | |||||
auto tensor_buffer = TensorBuffer::Create(allocator, tensor_size); | |||||
auto tensor_buffer = TensorBuffer::Create(weight_base + data_offset, tensor_size); | |||||
GE_CHECK_NOTNULL(tensor_buffer); | GE_CHECK_NOTNULL(tensor_buffer); | ||||
std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | std::unique_ptr<TensorValue> constant_tensor(new (std::nothrow)TensorValue(std::move(tensor_buffer))); | ||||
GE_CHECK_NOTNULL(constant_tensor); | GE_CHECK_NOTNULL(constant_tensor); | ||||
constant_tensor->SetName("Constant_" + op_desc->GetName()); | constant_tensor->SetName("Constant_" + op_desc->GetName()); | ||||
if (tensor_size > 0) { | |||||
GE_CHK_RT_RET(rtMemcpy(constant_tensor->MutableData(), | |||||
constant_tensor->GetSize(), | |||||
ge_tensor->GetData().data(), | |||||
ge_tensor->GetData().size(), | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | |||||
} | |||||
hybrid_model_.constant_tensors_.emplace(constant_node, std::move(constant_tensor)); | |||||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), constant_node->GetName().c_str(), tensor_size); | |||||
hybrid_model_.constant_tensors_.emplace(node, std::move(constant_tensor)); | |||||
GELOGD("[%s] Constant node [%s] added, size = %ld", GetGraphName(), node->GetName().c_str(), tensor_size); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1038,6 +1078,53 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HybridModelBuilder::IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model) { | |||||
// index task defs | |||||
GELOGD("To index tasks for subgraph: %s", sub_graph->GetName().c_str()); | |||||
std::unordered_map<int64_t, NodePtr> node_map; | |||||
for (const auto &node : sub_graph->GetDirectNode()) { | |||||
GE_CHECK_NOTNULL(node); | |||||
GE_CHECK_NOTNULL(node->GetOpDesc()); | |||||
auto node_id = node->GetOpDesc()->GetId(); | |||||
GELOGD("op_index = %ld, node_name = %s", node_id, node->GetName().c_str()); | |||||
node_map.emplace(node_id, node); | |||||
} | |||||
auto tasks = ge_model->GetModelTaskDefPtr()->task(); | |||||
for (int i = 0; i < tasks.size(); ++i) { | |||||
const domi::TaskDef &task_def = tasks[i]; | |||||
GELOGI("Task id = %d, task type = %d", i, task_def.type()); | |||||
auto task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
uint32_t op_index = -1; | |||||
if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
op_index = task_def.kernel().context().op_index(); | |||||
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
op_index = task_def.kernel_ex().op_index(); | |||||
} else if (task_type == RT_MODEL_TASK_HCCL) { | |||||
op_index = task_def.kernel_hccl().op_index(); | |||||
} else { | |||||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||||
continue; | |||||
} | |||||
auto iter = node_map.find(op_index); | |||||
if (iter == node_map.end()) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to get node by index = %u", op_index); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
auto &node = iter->second; | |||||
if (task_type == RT_MODEL_TASK_KERNEL) { | |||||
ge_model->GetTBEKernelStore().LoadTBEKernelBinToOpDesc(node->GetOpDesc()); | |||||
} | |||||
GELOGD("Task loaded for node: %s, task type = %d, op_index = %u", node->GetName().c_str(), task_type, op_index); | |||||
hybrid_model_.task_defs_[node].emplace_back(task_def); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status HybridModelBuilder::IndexTaskDefs() { | Status HybridModelBuilder::IndexTaskDefs() { | ||||
const auto &root_graph = ge_root_model_->GetRootGraph(); | const auto &root_graph = ge_root_model_->GetRootGraph(); | ||||
if (SetOutputNameAttr(*root_graph) != SUCCESS) { | if (SetOutputNameAttr(*root_graph) != SUCCESS) { | ||||
@@ -35,6 +35,7 @@ class HybridModelBuilder { | |||||
explicit HybridModelBuilder(HybridModel &hybrid_model); | explicit HybridModelBuilder(HybridModel &hybrid_model); | ||||
~HybridModelBuilder() = default; | ~HybridModelBuilder() = default; | ||||
Status Build(); | Status Build(); | ||||
Status BuildForSingleOp(); | |||||
private: | private: | ||||
static Status UpdateAnchorStatus(const NodePtr &node); | static Status UpdateAnchorStatus(const NodePtr &node); | ||||
@@ -64,6 +65,7 @@ class HybridModelBuilder { | |||||
Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies); | ||||
Status ParseDependentForFusedSubgraph(NodeItem &node_item); | Status ParseDependentForFusedSubgraph(NodeItem &node_item); | ||||
Status IndexTaskDefs(); | Status IndexTaskDefs(); | ||||
Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model); | |||||
Status IndexSpecialNodes(); | Status IndexSpecialNodes(); | ||||
Status InitRuntimeParams(); | Status InitRuntimeParams(); | ||||
Status InitModelMem(); | Status InitModelMem(); | ||||
@@ -49,6 +49,7 @@ Status AiCoreNodeExecutor::Initialize() { | |||||
Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node, shared_ptr<NodeTask> &task) const { | ||||
GE_CHECK_NOTNULL(node); | GE_CHECK_NOTNULL(node); | ||||
GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask Start.", node->GetName().c_str()); | ||||
bool is_single_op = model.IsSingleOp(); | |||||
auto *task_defs = model.GetTaskDefs(node); | auto *task_defs = model.GetTaskDefs(node); | ||||
if (task_defs == nullptr || task_defs->empty()) { | if (task_defs == nullptr || task_defs->empty()) { | ||||
@@ -66,7 +67,8 @@ Status AiCoreNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &nod | |||||
AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | AiCoreTaskBuilder builder(node->GetOpDesc(), *task_defs); | ||||
std::unique_ptr<NodeTask> node_task; | std::unique_ptr<NodeTask> node_task; | ||||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true), "[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
GE_CHK_STATUS_RET(builder.BuildTask(node_task, true, is_single_op), | |||||
"[%s] Failed to build op tasks.", node->GetName().c_str()); | |||||
task = std::move(node_task); | task = std::move(node_task); | ||||
GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | GELOGI("AiCoreNodeExecutor(%s) LoadTask End.", node->GetName().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -65,7 +65,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) { | |||||
} | } | ||||
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | TBEHandleStore &kernel_store = TBEHandleStore::GetInstance(); | ||||
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str()); | ||||
if (rt_ret != RT_ERROR_NONE) { | |||||
if (rt_ret != RT_ERROR_NONE || is_single_op_) { | |||||
void *bin_handle = nullptr; | void *bin_handle = nullptr; | ||||
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) { | ||||
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str()); | ||||
@@ -50,6 +50,8 @@ class AiCoreOpTask { | |||||
uint32_t GetBlockDim() const {return block_dim_;} | uint32_t GetBlockDim() const {return block_dim_;} | ||||
void SetSingleOp(bool is_single_op) {is_single_op_ = is_single_op;}; | |||||
protected: | protected: | ||||
Status UpdateTilingInfo(TaskContext &context); | Status UpdateTilingInfo(TaskContext &context); | ||||
virtual std::string GetKeyForOpParamSize() const; | virtual std::string GetKeyForOpParamSize() const; | ||||
@@ -72,6 +74,7 @@ class AiCoreOpTask { | |||||
uint32_t args_size_ = 0; | uint32_t args_size_ = 0; | ||||
uint32_t block_dim_ = 1; | uint32_t block_dim_ = 1; | ||||
bool clear_atomic_ = true; | bool clear_atomic_ = true; | ||||
bool is_single_op_ = false; | |||||
std::vector<int> output_indices_to_skip_; | std::vector<int> output_indices_to_skip_; | ||||
}; | }; | ||||
@@ -37,7 +37,9 @@ AiCoreTaskBuilder::AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector | |||||
: op_desc_(op_desc), task_defs_(task_defs) { | : op_desc_(op_desc), task_defs_(task_defs) { | ||||
} | } | ||||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic) { | |||||
Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, | |||||
bool ignore_failure_on_atomic, | |||||
bool is_single_op) { | |||||
GE_CHECK_NOTNULL(op_desc_); | GE_CHECK_NOTNULL(op_desc_); | ||||
if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | if (task_defs_.size() > kNumTaskWithAtomicAddrCleanTask) { | ||||
GELOGE(INTERNAL_ERROR, | GELOGE(INTERNAL_ERROR, | ||||
@@ -68,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
auto atomic_task = | auto atomic_task = | ||||
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask()); | ||||
GE_CHECK_NOTNULL(atomic_task); | GE_CHECK_NOTNULL(atomic_task); | ||||
atomic_task->SetSingleOp(is_single_op); | |||||
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()), | ||||
"[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
@@ -77,6 +80,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<NodeTask> &node_task, bool i | |||||
// build aicore task | // build aicore task | ||||
auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | auto aicore_task = std::unique_ptr<AiCoreOpTask>(new(std::nothrow)AiCoreOpTask()); | ||||
GE_CHECK_NOTNULL(aicore_task); | GE_CHECK_NOTNULL(aicore_task); | ||||
aicore_task->SetSingleOp(is_single_op); | |||||
GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | GE_CHK_STATUS_RET(aicore_task->Init(*op_desc_, task_defs_.back()), | ||||
"[%s] Failed to init task for AtomicAddrClean", | "[%s] Failed to init task for AtomicAddrClean", | ||||
op_desc_->GetName().c_str()); | op_desc_->GetName().c_str()); | ||||
@@ -47,7 +47,7 @@ class AiCoreTaskBuilder { | |||||
AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | AiCoreTaskBuilder(const OpDescPtr &op_desc, const std::vector<domi::TaskDef> &task_defs); | ||||
~AiCoreTaskBuilder() = default; | ~AiCoreTaskBuilder() = default; | ||||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic); | |||||
Status BuildTask(std::unique_ptr<NodeTask> &node_task, bool ignore_failure_on_atomic, bool is_single_op = false); | |||||
private: | private: | ||||
bool ExpectAtomicAddrCleanTask(); | bool ExpectAtomicAddrCleanTask(); | ||||
@@ -27,7 +27,7 @@ namespace ge { | |||||
namespace hybrid { | namespace hybrid { | ||||
REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); | REGISTER_NODE_EXECUTOR_BUILDER(NodeExecutorManager::ExecutorType::GE_LOCAL, GeLocalNodeExecutor); | ||||
const std::unordered_map<std::string, std::vector<uint32_t>> | |||||
const std::map<std::string, std::vector<uint32_t>> | |||||
RefInputTask::out_ref_input_index_ = {{DATA, {}}, | RefInputTask::out_ref_input_index_ = {{DATA, {}}, | ||||
{AIPPDATA, {}}, | {AIPPDATA, {}}, | ||||
{RESHAPE, {}}, | {RESHAPE, {}}, | ||||
@@ -36,7 +36,7 @@ const std::unordered_map<std::string, std::vector<uint32_t>> | |||||
{BROADCASTGRADIENTARGS, {}} | {BROADCASTGRADIENTARGS, {}} | ||||
}; | }; | ||||
const std::unordered_set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||||
const std::set<std::string> DependInputShapeTask::depend_input_shape_ops_ = {SHAPE, SHAPEN, RANK, SIZE}; | |||||
Status RefInputTask::UpdateArgs(TaskContext &) { | Status RefInputTask::UpdateArgs(TaskContext &) { | ||||
// no need update args | // no need update args | ||||
@@ -46,7 +46,7 @@ class RefInputTask : public NodeTask { | |||||
// key is op type, value is output ref input index, | // key is op type, value is output ref input index, | ||||
// e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one | // e.g. {1,0} means out[0] ref input[1], out[1] ref input[0], if vector is empty, it means ref input one by one | ||||
static const std::unordered_map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||||
static const std::map<std::string, std::vector<uint32_t>> out_ref_input_index_; | |||||
}; | }; | ||||
class DependInputShapeTask : public NodeTask { | class DependInputShapeTask : public NodeTask { | ||||
@@ -65,7 +65,7 @@ class DependInputShapeTask : public NodeTask { | |||||
const NodePtr node_; | const NodePtr node_; | ||||
// ops depend input shape | // ops depend input shape | ||||
static const std::unordered_set<std::string> depend_input_shape_ops_; | |||||
static const std::set<std::string> depend_input_shape_ops_; | |||||
}; | }; | ||||
class ConstantNodeTask : public NodeTask { | class ConstantNodeTask : public NodeTask { | ||||
@@ -31,7 +31,7 @@ using std::map; | |||||
using std::vector; | using std::vector; | ||||
namespace ge { | namespace ge { | ||||
class GELib { | |||||
class GE_FUNC_VISIBILITY GELib { | |||||
public: | public: | ||||
GELib() = default; | GELib() = default; | ||||
~GELib() = default; | ~GELib() = default; | ||||
@@ -77,7 +77,7 @@ Status CheckInputFormat(const string &input_format) { | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
std::string &dynamic_batch_size) { | std::string &dynamic_batch_size) { | ||||
int32_t size = 0; | int32_t size = 0; | ||||
for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { | for (auto iter = shape_map.begin(); iter != shape_map.end(); ++iter) { | ||||
@@ -119,7 +119,7 @@ bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||||
return true; | return true; | ||||
} | } | ||||
bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
const std::string input_format, std::string &dynamic_image_size) { | const std::string input_format, std::string &dynamic_image_size) { | ||||
if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { | if (!input_format.empty() && !ge::TypeUtils::IsFormatValid(input_format.c_str())) { | ||||
GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); | GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str()); | ||||
@@ -177,7 +177,7 @@ bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> | |||||
return true; | return true; | ||||
} | } | ||||
bool CheckDynamicDimsInputShapeValid(const unordered_map<string, vector<int64_t>> &shape_map, | |||||
bool CheckDynamicDimsInputShapeValid(const map<string, vector<int64_t>> &shape_map, | |||||
string input_format, string &dynamic_dims) { | string input_format, string &dynamic_dims) { | ||||
if (input_format != "ND") { | if (input_format != "ND") { | ||||
ErrorManager::GetInstance().ATCReportErrMessage( | ErrorManager::GetInstance().ATCReportErrMessage( | ||||
@@ -272,7 +272,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
unordered_map<string, vector<int64_t>> shape_map; | |||||
map<string, vector<int64_t>> shape_map; | |||||
vector<pair<string, vector<int64_t>>> user_shape_map; | vector<pair<string, vector<int64_t>>> user_shape_map; | ||||
is_dynamic_input = true; | is_dynamic_input = true; | ||||
if (input_shape.empty()) { | if (input_shape.empty()) { | ||||
@@ -310,7 +310,7 @@ Status CheckDynamicInputParamValid(string &dynamic_batch_size, string &dynamic_i | |||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
} | } | ||||
bool ParseInputShape(const string &input_shape, unordered_map<string, vector<int64_t>> &shape_map, | |||||
bool ParseInputShape(const string &input_shape, map<string, vector<int64_t>> &shape_map, | |||||
vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | vector<pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input) { | ||||
vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | vector<string> shape_vec = StringUtils::Split(input_shape, ';'); | ||||
const int DEFAULT_SHAPE_PAIR_SIZE = 2; | const int DEFAULT_SHAPE_PAIR_SIZE = 2; | ||||
@@ -46,13 +46,13 @@ static std::map<std::string, domiTensorFormat_t> input_format_str_to_geformat = | |||||
static const std::string kEnableCompressWeightTrue = "1"; | static const std::string kEnableCompressWeightTrue = "1"; | ||||
static const std::string kEnableCompressWeightFalse = "0"; | static const std::string kEnableCompressWeightFalse = "0"; | ||||
bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
bool CheckDynamicBatchSizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
std::string &dynamic_batch_size); | std::string &dynamic_batch_size); | ||||
bool CheckDynamicImagesizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | |||||
bool CheckDynamicImagesizeInputShapeValid(map<string, vector<int64_t>> shape_map, | |||||
const std::string input_format, std::string &dynamic_image_size); | const std::string input_format, std::string &dynamic_image_size); | ||||
bool CheckDynamicDimsInputShapeValid(const std::unordered_map<std::string, std::vector<int64_t>> &shape_map, | |||||
bool CheckDynamicDimsInputShapeValid(const std::map<std::string, std::vector<int64_t>> &shape_map, | |||||
std::string input_format, std::string &dynamic_dims); | std::string input_format, std::string &dynamic_dims); | ||||
bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); | bool CheckAndParseDynamicDims(int32_t dynamic_dim_num, std::string &dynamic_dims); | ||||
@@ -61,7 +61,7 @@ Status CheckDynamicInputParamValid(std::string &dynamic_batch_size, std::string | |||||
std::string &dynamic_dims, const std::string input_shape, | std::string &dynamic_dims, const std::string input_shape, | ||||
const std::string input_format, bool &is_dynamic_input); | const std::string input_format, bool &is_dynamic_input); | ||||
bool ParseInputShape(const std::string &input_shape, std::unordered_map<string, std::vector<int64_t>> &shape_map, | |||||
bool ParseInputShape(const std::string &input_shape, std::map<string, std::vector<int64_t>> &shape_map, | |||||
std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | std::vector<std::pair<string, vector<int64_t>>> &user_shape_map, bool is_dynamic_input = false); | ||||
Status CheckOutputTypeParamValid(const std::string output_type); | Status CheckOutputTypeParamValid(const std::string output_type); | ||||
@@ -268,7 +268,7 @@ graphStatus Impl::UpdateDataOpAttr(const Graph &graph) { | |||||
if (options_.find(kInputShape) == options_.end()) { | if (options_.find(kInputShape) == options_.end()) { | ||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
unordered_map<string, vector<int64_t>> shape_map; | |||||
map<string, vector<int64_t>> shape_map; | |||||
vector<pair<string, vector<int64_t>>> user_shape_map; | vector<pair<string, vector<int64_t>>> user_shape_map; | ||||
GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | GE_CHK_BOOL_EXEC(ParseInputShape(options_[kInputShape], shape_map, user_shape_map, true), | ||||
return GRAPH_PARAM_INVALID, "parse input shape failed!"); | return GRAPH_PARAM_INVALID, "parse input shape failed!"); | ||||
@@ -23,6 +23,7 @@ target_compile_options(atc_atc.bin PRIVATE | |||||
-O2 | -O2 | ||||
-Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(atc_atc.bin PRIVATE | target_compile_definitions(atc_atc.bin PRIVATE | ||||
@@ -30,6 +31,7 @@ target_compile_definitions(atc_atc.bin PRIVATE | |||||
COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
google=ascend_private | google=ascend_private | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(atc_atc.bin PRIVATE | target_include_directories(atc_atc.bin PRIVATE | ||||
@@ -58,6 +60,10 @@ target_include_directories(atc_atc.bin PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(atc_atc.bin PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(atc_atc.bin PRIVATE | target_link_libraries(atc_atc.bin PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
ascend_protobuf | ascend_protobuf | ||||
@@ -90,6 +96,7 @@ target_compile_options(fwk_atc.bin PRIVATE | |||||
-O2 | -O2 | ||||
-Wno-deprecated-declarations | -Wno-deprecated-declarations | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(fwk_atc.bin PRIVATE | target_compile_definitions(fwk_atc.bin PRIVATE | ||||
@@ -97,6 +104,7 @@ target_compile_definitions(fwk_atc.bin PRIVATE | |||||
COMPILE_OMG_PACKAGE | COMPILE_OMG_PACKAGE | ||||
google=ascend_private | google=ascend_private | ||||
LOG_CPP | LOG_CPP | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(fwk_atc.bin PRIVATE | target_include_directories(fwk_atc.bin PRIVATE | ||||
@@ -125,6 +133,10 @@ target_include_directories(fwk_atc.bin PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_options(fwk_atc.bin PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(fwk_atc.bin PRIVATE | target_link_libraries(fwk_atc.bin PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
ascend_protobuf | ascend_protobuf | ||||
@@ -23,7 +23,7 @@ | |||||
namespace ge { | namespace ge { | ||||
using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>; | using OpsKernelBuilderPtr = std::shared_ptr<OpsKernelBuilder>; | ||||
class OpsKernelBuilderManager { | |||||
class GE_FUNC_VISIBILITY OpsKernelBuilderManager { | |||||
public: | public: | ||||
~OpsKernelBuilderManager(); | ~OpsKernelBuilderManager(); | ||||
@@ -41,7 +41,7 @@ using std::vector; | |||||
namespace ge { | namespace ge { | ||||
using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>; | using OpsKernelInfoStorePtr = std::shared_ptr<OpsKernelInfoStore>; | ||||
class OpsKernelManager { | |||||
class GE_FUNC_VISIBILITY OpsKernelManager { | |||||
public: | public: | ||||
friend class GELib; | friend class GELib; | ||||
@@ -9,11 +9,13 @@ add_library(engine SHARED ${SRC_LIST}) | |||||
target_compile_options(engine PRIVATE | target_compile_options(engine PRIVATE | ||||
-Werror | -Werror | ||||
-fno-common | -fno-common | ||||
-fvisibility=hidden | |||||
) | ) | ||||
target_compile_definitions(engine PRIVATE | target_compile_definitions(engine PRIVATE | ||||
REUSE_MEMORY=1 | REUSE_MEMORY=1 | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
FUNC_VISIBILITY | |||||
) | ) | ||||
target_include_directories(engine PRIVATE | target_include_directories(engine PRIVATE | ||||
@@ -32,6 +34,10 @@ target_include_directories(engine PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc | ${GE_CODE_DIR}/third_party/fwkacllib/inc | ||||
) | ) | ||||
target_link_options(engine PRIVATE | |||||
-Wl,-Bsymbolic | |||||
) | |||||
target_link_libraries(engine PRIVATE | target_link_libraries(engine PRIVATE | ||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
@@ -25,7 +25,7 @@ | |||||
#include "plugin/engine/engine_manage.h" | #include "plugin/engine/engine_manage.h" | ||||
namespace ge { | namespace ge { | ||||
class AICoreDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY AICoreDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
AICoreDNNEngine() = default; | AICoreDNNEngine() = default; | ||||
explicit AICoreDNNEngine(const std::string &engine_name); | explicit AICoreDNNEngine(const std::string &engine_name); | ||||
@@ -40,7 +40,7 @@ class AICoreDNNEngine : public DNNEngine { | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class VectorCoreDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY VectorCoreDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
VectorCoreDNNEngine() = default; | VectorCoreDNNEngine() = default; | ||||
explicit VectorCoreDNNEngine(const std::string &engine_name); | explicit VectorCoreDNNEngine(const std::string &engine_name); | ||||
@@ -56,7 +56,7 @@ class VectorCoreDNNEngine : public DNNEngine { | |||||
}; | }; | ||||
class AICpuDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY AICpuDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
AICpuDNNEngine() = default; | AICpuDNNEngine() = default; | ||||
explicit AICpuDNNEngine(const std::string &engine_name); | explicit AICpuDNNEngine(const std::string &engine_name); | ||||
@@ -71,7 +71,7 @@ class AICpuDNNEngine : public DNNEngine { | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class AICpuTFDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY AICpuTFDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
AICpuTFDNNEngine() = default; | AICpuTFDNNEngine() = default; | ||||
explicit AICpuTFDNNEngine(const std::string &engine_name); | explicit AICpuTFDNNEngine(const std::string &engine_name); | ||||
@@ -86,7 +86,7 @@ class AICpuTFDNNEngine : public DNNEngine { | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class GeLocalDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY GeLocalDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
GeLocalDNNEngine() = default; | GeLocalDNNEngine() = default; | ||||
explicit GeLocalDNNEngine(const std::string &engine_name); | explicit GeLocalDNNEngine(const std::string &engine_name); | ||||
@@ -101,7 +101,7 @@ class GeLocalDNNEngine : public DNNEngine { | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class HostCpuDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY HostCpuDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
HostCpuDNNEngine() = default; | HostCpuDNNEngine() = default; | ||||
explicit HostCpuDNNEngine(const std::string &engine_name); | explicit HostCpuDNNEngine(const std::string &engine_name); | ||||
@@ -116,7 +116,7 @@ private: | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class RtsDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY RtsDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
RtsDNNEngine() = default; | RtsDNNEngine() = default; | ||||
explicit RtsDNNEngine(const std::string &engine_name); | explicit RtsDNNEngine(const std::string &engine_name); | ||||
@@ -131,7 +131,7 @@ class RtsDNNEngine : public DNNEngine { | |||||
DNNEngineAttribute engine_attribute_; | DNNEngineAttribute engine_attribute_; | ||||
}; | }; | ||||
class HcclDNNEngine : public DNNEngine { | |||||
class GE_FUNC_VISIBILITY HcclDNNEngine : public DNNEngine { | |||||
public: | public: | ||||
HcclDNNEngine() = default; | HcclDNNEngine() = default; | ||||
explicit HcclDNNEngine(const std::string &engine_name); | explicit HcclDNNEngine(const std::string &engine_name); | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #ifndef GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | ||||
#define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #define GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <memory> | #include <memory> | ||||
#include <string> | #include <string> | ||||
@@ -26,7 +40,7 @@ | |||||
namespace ge { | namespace ge { | ||||
using DNNEnginePtr = std::shared_ptr<DNNEngine>; | using DNNEnginePtr = std::shared_ptr<DNNEngine>; | ||||
class EngineManager { | |||||
class GE_FUNC_VISIBILITY EngineManager { | |||||
public: | public: | ||||
static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); | static Status RegisterEngine(const std::string &engine_name, DNNEnginePtr engine_ptr); | ||||
static DNNEnginePtr GetEngine(const std::string &engine_name); | static DNNEnginePtr GetEngine(const std::string &engine_name); | ||||
@@ -34,7 +48,7 @@ class EngineManager { | |||||
}; | }; | ||||
extern "C" { | extern "C" { | ||||
void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||||
GE_FUNC_VISIBILITY void GetDNNEngineObjs(std::map<std::string, DNNEnginePtr> &engines); | |||||
} | } | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ | #endif // GE_PLUGIN_ENGINE_ENGINE_MANAGE_H_ |
@@ -77,6 +77,23 @@ Status InnerSession::Initialize() { | |||||
UpdateThreadContext(std::map<std::string, std::string>{}); | UpdateThreadContext(std::map<std::string, std::string>{}); | ||||
// session device id set here | |||||
std::string str_session_device_id; | |||||
if (GetContext().GetOption("ge.session_device_id", str_session_device_id) == SUCCESS) { | |||||
GELOGI("Option session device id has set, value is %s.", str_session_device_id.c_str()); | |||||
uint32_t session_device_id = 0; | |||||
try { | |||||
session_device_id = static_cast<uint32_t>(std::stoi(str_session_device_id.c_str())); | |||||
// session device id has priority | |||||
GetContext().SetCtxDeviceId(session_device_id); | |||||
} catch (std::invalid_argument &) { | |||||
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||||
} catch (std::out_of_range &) { | |||||
GELOGW("session device id %s transform to int failed.", str_session_device_id.c_str()); | |||||
} | |||||
} | |||||
GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | GE_CHK_RT_RET(rtSetDevice(GetContext().DeviceId())); | ||||
DumpProperties dump_properties; | DumpProperties dump_properties; | ||||
@@ -606,7 +606,7 @@ Status InitDomiOmgContext(const string &input_shape, const string &input_format, | |||||
} | } | ||||
// Analyze the input shape paramete | // Analyze the input shape paramete | ||||
unordered_map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||||
map<string, vector<int64_t>> &shape_map = domi::GetContext().input_dims; | |||||
if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, | if (!ge::ParseInputShape(input_shape, domi::GetContext().input_dims, domi::GetContext().user_input_dims, | ||||
is_dynamic_input) || | is_dynamic_input) || | ||||
@@ -689,7 +689,7 @@ Status ParseOutNodes(const string &out_nodes) { | |||||
/// | /// | ||||
static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { | static Status CheckOpNameMap(const ComputeGraphPtr &graph, const std::string &op_conf) { | ||||
GE_CHECK_NOTNULL(graph); | GE_CHECK_NOTNULL(graph); | ||||
unordered_map<string, string> graphNodeTypes; | |||||
map<string, string> graphNodeTypes; | |||||
for (const NodePtr &node : graph->GetAllNodes()) { | for (const NodePtr &node : graph->GetAllNodes()) { | ||||
auto op_desc = node->GetOpDesc(); | auto op_desc = node->GetOpDesc(); | ||||
if (op_desc == nullptr) { | if (op_desc == nullptr) { | ||||
@@ -256,9 +256,27 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||||
const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
vector<DataBuffer> &output_buffers) { | vector<DataBuffer> &output_buffers) { | ||||
GE_CHECK_NOTNULL(op_task_); | |||||
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); | ||||
if (hybrid_model_executor_ != nullptr) { | |||||
GELOGD("Execute multi-task dynamic single op by hybrid model executor"); | |||||
hybrid::HybridModelExecutor::ExecuteArgs args; | |||||
for (auto &input : input_buffers) { | |||||
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); | |||||
} | |||||
for (auto &output : output_buffers) { | |||||
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); | |||||
} | |||||
for (auto &tensor_desc : input_desc) { | |||||
auto desc = MakeShared<GeTensorDesc>(tensor_desc); | |||||
GE_CHECK_NOTNULL(desc); | |||||
args.input_desc.emplace_back(desc); | |||||
} | |||||
return hybrid_model_executor_->Execute(args); | |||||
} | |||||
std::lock_guard<std::mutex> lk(*stream_mutex_); | std::lock_guard<std::mutex> lk(*stream_mutex_); | ||||
GE_CHECK_NOTNULL(op_task_); | |||||
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); | ||||
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); | ||||
@@ -28,6 +28,7 @@ | |||||
#include "runtime/stream.h" | #include "runtime/stream.h" | ||||
#include "task/op_task.h" | #include "task/op_task.h" | ||||
#include "cce/aicpu_engine_struct.h" | #include "cce/aicpu_engine_struct.h" | ||||
#include "hybrid/executor/hybrid_model_executor.h" | |||||
namespace ge { | namespace ge { | ||||
class StreamResource; | class StreamResource; | ||||
@@ -46,7 +47,7 @@ class SingleOp { | |||||
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
friend class SingleOpModel; | friend class SingleOpModel; | ||||
StreamResource *stream_resource_; | |||||
StreamResource *stream_resource_ = nullptr; | |||||
std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
std::vector<void *> input_addr_list_; | std::vector<void *> input_addr_list_; | ||||
@@ -77,6 +78,8 @@ class DynamicSingleOp { | |||||
std::vector<DataBuffer> &outputs) const; | std::vector<DataBuffer> &outputs) const; | ||||
std::unique_ptr<OpTask> op_task_; | std::unique_ptr<OpTask> op_task_; | ||||
std::unique_ptr<hybrid::HybridModel> hybrid_model_; | |||||
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_; | |||||
uintptr_t resource_id_ = 0; | uintptr_t resource_id_ = 0; | ||||
std::mutex *stream_mutex_; | std::mutex *stream_mutex_; | ||||
rtStream_t stream_ = nullptr; | rtStream_t stream_ = nullptr; | ||||
@@ -31,6 +31,8 @@ | |||||
#include "task/aicpu_task_builder.h" | #include "task/aicpu_task_builder.h" | ||||
#include "task/aicpu_kernel_task_builder.h" | #include "task/aicpu_kernel_task_builder.h" | ||||
#include "task/tbe_task_builder.h" | #include "task/tbe_task_builder.h" | ||||
#include "hybrid/executor/hybrid_model_executor.h" | |||||
#include "hybrid/node_executor/node_executor.h" | |||||
static std::atomic<std::uint64_t> aicpu_kernel_id(0); | static std::atomic<std::uint64_t> aicpu_kernel_id(0); | ||||
@@ -42,6 +44,20 @@ namespace ge { | |||||
namespace { | namespace { | ||||
const size_t kDataOutputNum = 1; | const size_t kDataOutputNum = 1; | ||||
} // namespace | } // namespace | ||||
static Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { | |||||
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); | |||||
for (const auto &node : comp_graph->GetAllNodes()) { | |||||
auto op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
const auto &depends = op_desc->GetOpInferDepends(); | |||||
if (!depends.empty()) { | |||||
flag = true; | |||||
return SUCCESS; | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | SingleOpModel::SingleOpModel(const std::string &model_name, const void *model_data, uint32_t model_size) | ||||
: model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | : model_name_(model_name), ori_model_data_(model_data), ori_model_size_(model_size) {} | ||||
@@ -478,6 +494,30 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & | |||||
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); | ||||
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); | ||||
model_params_.memory_size = UINT_MAX; | model_params_.memory_size = UINT_MAX; | ||||
auto ge_model = model_helper_.GetGeModel(); | |||||
GE_CHECK_NOTNULL(ge_model); | |||||
bool infer_depend_flag = false; | |||||
GE_CHK_STATUS_RET_NOLOG(IfInferDepend(ge_model, infer_depend_flag)); | |||||
if (ge_model->GetModelTaskDefPtr()->task_size() > 1 || infer_depend_flag) { | |||||
GELOGD("Build single op HybridModel."); | |||||
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); | |||||
auto root_model = model_helper_.GetGeRootModel(); | |||||
GE_CHECK_NOTNULL(root_model); | |||||
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); | |||||
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); | |||||
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); | |||||
GE_CHECK_NOTNULL(single_op.hybrid_model_); | |||||
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "Failed to init hybrid model"); | |||||
int32_t device_id = 0; | |||||
GE_CHK_RT_RET(rtGetDevice(&device_id)); | |||||
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), | |||||
device_id, | |||||
resource.GetStream())); | |||||
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); | |||||
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "Failed to init hybrid model"); | |||||
return SUCCESS; | |||||
} | |||||
return BuildTaskListForDynamicOp(single_op); | return BuildTaskListForDynamicOp(single_op); | ||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -61,6 +61,10 @@ DynamicSingleOp *StreamResource::GetDynamicOperator(const void *key) { | |||||
return it->second.get(); | return it->second.get(); | ||||
} | } | ||||
rtStream_t StreamResource::GetStream() const { | |||||
return stream_; | |||||
} | |||||
void StreamResource::SetStream(rtStream_t stream) { | void StreamResource::SetStream(rtStream_t stream) { | ||||
stream_ = stream; | stream_ = stream; | ||||
} | } | ||||
@@ -37,6 +37,7 @@ class StreamResource { | |||||
StreamResource(StreamResource &&) = delete; | StreamResource(StreamResource &&) = delete; | ||||
StreamResource &operator=(const StreamResource &) = delete; | StreamResource &operator=(const StreamResource &) = delete; | ||||
StreamResource &operator=(StreamResource &&) = delete; | StreamResource &operator=(StreamResource &&) = delete; | ||||
rtStream_t GetStream() const; | |||||
void SetStream(rtStream_t stream); | void SetStream(rtStream_t stream); | ||||
SingleOp *GetOperator(const void *key); | SingleOp *GetOperator(const void *key); | ||||
@@ -16,7 +16,7 @@ logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(leve | |||||
""" | """ | ||||
this attr is used for symbol table visible | this attr is used for symbol table visible | ||||
""" | """ | ||||
GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY' | |||||
GE_ATTR = 'GE_FUNC_VISIBILITY' | |||||
""" | """ | ||||
generate stub func body by return type | generate stub func body by return type | ||||
@@ -34,15 +34,15 @@ typedef uint32_t (*pCallBackFunc)(uint32_t graph_id, const std::map<AscendString | |||||
} | } | ||||
// Initialize GE | // Initialize GE | ||||
ATTRIBUTED_DEPRECATED(Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||||
Status GEInitialize(const std::map<std::string, std::string> &options); | |||||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &)) | |||||
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<std::string, std::string> &options); | |||||
Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||||
GE_FUNC_VISIBILITY Status GEInitialize(const std::map<AscendString, AscendString> &options); | |||||
// Finalize GE, release all resources | // Finalize GE, release all resources | ||||
Status GEFinalize(); | |||||
GE_FUNC_VISIBILITY Status GEFinalize(); | |||||
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY Session { | |||||
class GE_FUNC_VISIBILITY Session { | |||||
public: | public: | ||||
ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ATTRIBUTED_DEPRECATED(Session(const std::map<AscendString, AscendString> &)) | ||||
explicit Session(const std::map<std::string, std::string> &options); | explicit Session(const std::map<std::string, std::string> &options); | ||||
@@ -28,7 +28,7 @@ namespace ge { | |||||
#define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | #define ATTRIBUTED_DEPRECATED(replacement) __declspec(deprecated("Please use " #replacement " instead.")) | ||||
#endif | #endif | ||||
class StatusFactory { | |||||
class GE_FUNC_VISIBILITY StatusFactory { | |||||
public: | public: | ||||
static StatusFactory *Instance() { | static StatusFactory *Instance() { | ||||
static StatusFactory instance; | static StatusFactory instance; | ||||
@@ -70,7 +70,7 @@ class StatusFactory { | |||||
std::map<uint32_t, std::string> err_desc_; | std::map<uint32_t, std::string> err_desc_; | ||||
}; | }; | ||||
class ErrorNoRegisterar { | |||||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||||
public: | public: | ||||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const char *desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #ifndef INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
#define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | #define INC_EXTERNAL_GE_GE_ERROR_CODES_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <stddef.h> | #include <stddef.h> | ||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef INC_EXTERNAL_GE_IR_BUILD_H_ | #ifndef INC_EXTERNAL_GE_IR_BUILD_H_ | ||||
#define INC_EXTERNAL_GE_IR_BUILD_H_ | #define INC_EXTERNAL_GE_IR_BUILD_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <string> | #include <string> | ||||
#include <map> | #include <map> | ||||
#include <memory> | #include <memory> | ||||
@@ -44,17 +58,17 @@ struct ModelBufferData { | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||||
graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &)) | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<std::string, std::string> global_options); | |||||
graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildInitialize(std::map<AscendString, AscendString> &global_options); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
* @brief build model.Notice the model is stored in buffer | * @brief build model.Notice the model is stored in buffer | ||||
* | * | ||||
*/ | */ | ||||
void aclgrphBuildFinalize(); | |||||
GE_FUNC_VISIBILITY void aclgrphBuildFinalize(); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -66,12 +80,12 @@ void aclgrphBuildFinalize(); | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &, const std::map<AscendString, AscendString> &, | |||||
ModelBufferData &)) | ModelBufferData &)) | ||||
graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<std::string, std::string> &build_options, | |||||
ModelBufferData &model); | ModelBufferData &model); | ||||
graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendString, AscendString> &build_options, | |||||
ModelBufferData &model); | ModelBufferData &model); | ||||
/** | /** | ||||
@@ -83,10 +97,10 @@ graphStatus aclgrphBuildModel(const ge::Graph &graph, const std::map<AscendStrin | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
ATTRIBUTED_DEPRECATED(graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||||
graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||||
ATTRIBUTED_DEPRECATED(GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *, const ModelBufferData &)) | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const string &output_file, const ModelBufferData &model); | |||||
graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &model); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -98,7 +112,7 @@ graphStatus aclgrphSaveModel(const char *output_file, const ModelBufferData &mod | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *patch_version); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -110,7 +124,7 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len); | |||||
/** | /** | ||||
* @ingroup AscendCL | * @ingroup AscendCL | ||||
@@ -123,7 +137,7 @@ graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const siz | |||||
* @retval GRAPH_SUCCESS The function is successfully executed. | * @retval GRAPH_SUCCESS The function is successfully executed. | ||||
* @retval OtherValues Failure | * @retval OtherValues Failure | ||||
*/ | */ | ||||
graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||||
GE_FUNC_VISIBILITY graphStatus aclgrphGenerateForOp(const AscendString &op_type, const std::vector<TensorDesc> &inputs, | |||||
const std::vector<TensorDesc> &outputs, Graph &graph); | const std::vector<TensorDesc> &outputs, Graph &graph); | ||||
}; // namespace ge | }; // namespace ge | ||||
@@ -37,7 +37,7 @@ extern "C" { | |||||
// trace status of log | // trace status of log | ||||
enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | enum TraceStatus { TRACE_INIT = 0, TRACE_RUNNING, TRACE_WAITING, TRACE_STOP }; | ||||
class GeLog { | |||||
class GE_FUNC_VISIBILITY GeLog { | |||||
public: | public: | ||||
static uint64_t GetTid() { | static uint64_t GetTid() { | ||||
#ifdef __GNUC__ | #ifdef __GNUC__ | ||||
@@ -278,7 +278,7 @@ | |||||
} while (0) | } while (0) | ||||
template <typename T> | template <typename T> | ||||
std::string FmtToStr(const T &t) { | |||||
GE_FUNC_VISIBILITY std::string FmtToStr(const T &t) { | |||||
std::string fmt; | std::string fmt; | ||||
std::stringstream st; | std::stringstream st; | ||||
st << "[" << t << "]"; | st << "[" << t << "]"; | ||||
@@ -17,6 +17,20 @@ | |||||
#ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | #ifndef INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | ||||
#define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | #define INC_FRAMEWORK_COMMON_FMK_ERROR_CODES_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
@@ -38,7 +52,7 @@ const int MODID_OME = 2; // OME module ID | |||||
const int MODID_CALIBRATION = 3; // Calibration module ID | const int MODID_CALIBRATION = 3; // Calibration module ID | ||||
namespace domi { | namespace domi { | ||||
class StatusFactory { | |||||
class GE_FUNC_VISIBILITY StatusFactory { | |||||
public: | public: | ||||
static StatusFactory *Instance(); | static StatusFactory *Instance(); | ||||
@@ -54,7 +68,7 @@ class StatusFactory { | |||||
std::map<uint32_t, std::string> err_desc_; | std::map<uint32_t, std::string> err_desc_; | ||||
}; | }; | ||||
class ErrorNoRegisterar { | |||||
class GE_FUNC_VISIBILITY ErrorNoRegisterar { | |||||
public: | public: | ||||
ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ErrorNoRegisterar(uint32_t err, const std::string &desc) { StatusFactory::Instance()->RegisterErrorNo(err, desc); } | ||||
~ErrorNoRegisterar() {} | ~ErrorNoRegisterar() {} | ||||
@@ -23,7 +23,7 @@ | |||||
#include "graph/tensor.h" | #include "graph/tensor.h" | ||||
namespace ge { | namespace ge { | ||||
class GeFormatUtil { | |||||
class GE_FUNC_VISIBILITY GeFormatUtil { | |||||
public: | public: | ||||
/// | /// | ||||
/// @name TransShape | /// @name TransShape | ||||
@@ -215,7 +215,7 @@ struct ModelInfo { | |||||
}; | }; | ||||
// Asynchronous callback interface, implemented by the caller | // Asynchronous callback interface, implemented by the caller | ||||
class ModelListener { | |||||
class GE_FUNC_VISIBILITY ModelListener { | |||||
public: | public: | ||||
virtual ~ModelListener() {} | virtual ~ModelListener() {} | ||||
/// | /// | ||||
@@ -17,11 +17,25 @@ | |||||
#ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | #ifndef INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | ||||
#define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | #define INC_FRAMEWORK_COMMON_GFLAGS_UTIL_H_ | ||||
#if defined(_MSC_VER) | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY _declspec(dllexport) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#else | |||||
#ifdef FUNC_VISIBILITY | |||||
#define GE_FUNC_VISIBILITY __attribute__((visibility("default"))) | |||||
#else | |||||
#define GE_FUNC_VISIBILITY | |||||
#endif | |||||
#endif | |||||
#include <gflags/gflags.h> | #include <gflags/gflags.h> | ||||
#include <string> | #include <string> | ||||
namespace ge { | namespace ge { | ||||
class GflagsUtils { | |||||
class GE_FUNC_VISIBILITY GflagsUtils { | |||||
public: | public: | ||||
static bool IsSetCommandTrue(const char *name) { | static bool IsSetCommandTrue(const char *name) { | ||||
std::string out; | std::string out; | ||||
@@ -28,7 +28,7 @@ | |||||
#include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
namespace ge { | namespace ge { | ||||
class ModelHelper { | |||||
class GE_FUNC_VISIBILITY ModelHelper { | |||||
public: | public: | ||||
ModelHelper() = default; | ModelHelper() = default; | ||||
~ModelHelper(); | ~ModelHelper(); | ||||