From 249a116bec2cbfac23c6537e80b663556f8a56c9 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 3 Mar 2021 13:55:07 +0800 Subject: [PATCH] fix(linux/whl): fix megengine whl build faild at manylinux2014 as > cuda11 cudnn8 trt7 have some dlopen libs, auditwheel can not handle correctly, also at lastest auditwheel, NEW LIBS dirs change from package data dir to package dir(eg, megengine to MegEngine), which lead to copy manually libs change rpath hard with auditwheel rpath, so do not auditwheel now! GitOrigin-RevId: 3dfc8a526709563b16efe6b4e48645cf8fd8b9e4 --- scripts/whl/manylinux2014/build_wheel_common.sh | 8 +-- scripts/whl/manylinux2014/do_build_common.sh | 76 +++++++++++-------------- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/scripts/whl/manylinux2014/build_wheel_common.sh b/scripts/whl/manylinux2014/build_wheel_common.sh index ab534474..3d08fb87 100755 --- a/scripts/whl/manylinux2014/build_wheel_common.sh +++ b/scripts/whl/manylinux2014/build_wheel_common.sh @@ -30,14 +30,14 @@ done echo "Build with ${SDK_NAME}" if [ $SDK_NAME == "cu101" ];then - COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" + CUDA_COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF" BUILD_GCC8="ON" REQUIR_CUDA_VERSION="10010" REQUIR_CUDNN_VERSION="7.6.3" REQUIR_TENSORRT_VERSION="6.0.1.5" elif [ $SDK_NAME == "cu111" ];then - COPY_LIB_LIST="\ + CUDA_COPY_LIB_LIST="\ ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\ ${CUDA_LIB_DIR}/libcublas.so.11:\ @@ -59,7 +59,7 @@ elif [ $SDK_NAME == "cu111" ];then REQUIR_CUDNN_VERSION="8.0.4" REQUIR_TENSORRT_VERSION="7.2.2.3" elif [ $SDK_NAME == "cu112" ];then - COPY_LIB_LIST="\ + CUDA_COPY_LIB_LIST="\ ${CUDA_LIB_DIR}/libnvrtc.so.11.2:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\ ${CUDA_LIB_DIR}/libcublas.so.11:\ @@ -204,7 +204,7 @@ docker run --rm -it $TMPFS_ARGS \ -e BUILD_WHL_CPU_ONLY=${BUILD_WHL_CPU_ONLY} \ -e ALL_PYTHON="${ALL_PYTHON}" \ -e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \ - -e COPY_LIB_LIST="$COPY_LIB_LIST" \ + -e CUDA_COPY_LIB_LIST="$CUDA_COPY_LIB_LIST" \ -e SDK_NAME="$SDK_NAME" \ -v ${CUDA_ROOT_DIR}:/usr/local/cuda \ -v ${CUDNN_ROOT_DIR}:/opt/cudnn \ diff --git a/scripts/whl/manylinux2014/do_build_common.sh b/scripts/whl/manylinux2014/do_build_common.sh index 3e313612..7fcf3ef1 100755 --- a/scripts/whl/manylinux2014/do_build_common.sh +++ b/scripts/whl/manylinux2014/do_build_common.sh @@ -24,36 +24,43 @@ function full_copy_so(){ fi } -function patch_elf_depend_lib() { - echo "handle common depend lib" +function handle_copy_cuda_libs() { + TO_DIR=$1 + if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then + echo "handle cuda lib to ${TO_DIR}" + cp ${BUILD_DIR}/dnn/cuda-stub/libcuda_stub.so ${TO_DIR} + handle_strip ${TO_DIR}/libcuda_stub.so + cp /usr/local/cuda/lib64/libnvToolsExt.so.1 ${TO_DIR} + IFS=: read -a lib_name_array <<<"$CUDA_COPY_LIB_LIST" + append_rpath='$ORIGIN/.' + for lib_name in ${lib_name_array[@]};do + echo "cuda copy detail: ${lib_name} to ${TO_DIR}" + full_copy_so $lib_name ${TO_DIR} $lib_append_rpath + done + fi +} + +function patch_elf_depend_lib_mgb_mge() { + echo "handle common depend lib for mgb or mge" LIBS_DIR=${BUILD_DIR}/staging/megengine/core/lib mkdir -p ${LIBS_DIR} cp /usr/lib64/libatomic.so.1 ${LIBS_DIR} patchelf --remove-rpath ${BUILD_DIR}/staging/megengine/core/_imperative_rt.so patchelf --force-rpath --set-rpath '$ORIGIN/lib' ${BUILD_DIR}/staging/megengine/core/_imperative_rt.so - cp ${BUILD_DIR}/src/libmegengine_export.so ${LIBS_DIR} - patchelf --remove-rpath ${LIBS_DIR}/libmegengine_export.so - patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so + handle_strip ${BUILD_DIR}/staging/megengine/core/_imperative_rt.so cp ${BUILD_DIR}/src/libmegengine_export.so ${LIBS_DIR} patchelf --remove-rpath ${LIBS_DIR}/libmegengine_export.so patchelf --force-rpath --set-rpath '$ORIGIN/.' ${LIBS_DIR}/libmegengine_export.so + handle_strip ${LIBS_DIR}/libmegengine_export.so - if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then - echo "handle cuda lib" - cp ${BUILD_DIR}/dnn/cuda-stub/libcuda_stub.so ${LIBS_DIR} - cp /usr/local/cuda/lib64/libnvToolsExt.so.1 ${LIBS_DIR} - IFS=: read -a lib_name_array <<<"$COPY_LIB_LIST" - append_rpath='$ORIGIN/.' - for lib_name in ${lib_name_array[@]};do - full_copy_so $lib_name ${LIBS_DIR} $lib_append_rpath - done - fi + # as some version of cudnn/trt libs have dlopen libs, so we can not use auditwheel + # TODO: PR for auditwheel to support args for dlopen libs + handle_copy_cuda_libs ${LIBS_DIR} } - ALL_PYTHON=${ALL_PYTHON} if [[ -z ${ALL_PYTHON} ]] then @@ -71,10 +78,9 @@ BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Rel if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then BUILD_DIR=${SRC_DIR}/build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_OFF/Release/build/ fi -NEW_LIB_PATH=core/lib + for ver in ${ALL_PYTHON} do - USE_AUDITWHEEL="ON" python_ver=${ver:0:2} MAJOR=${python_ver:0:1} MINOR=${ver:1} @@ -87,6 +93,7 @@ do export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DPYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python${MAJOR}.${MINOR}" export EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DMGE_WITH_ATLAS=ON" + # TODO: after change to Ninja, only the fisrt loop need add -r to save build time if [ ${BUILD_WHL_CPU_ONLY} = "OFF" ]; then ${SRC_DIR}/scripts/cmake-build/host_build.sh -c -t -r else @@ -99,38 +106,23 @@ do cp -a imperative/python/{megengine,setup.py,requires.txt,requires-style.txt,requires-test.txt} staging/ - - handle_strip ${BUILD_DIR}/src/libmegengine_export.so - cd ${BUILD_DIR}/staging/megengine/core - handle_strip _imperative_rt.so - mkdir -p lib/ucx - - - - if [ ${USE_AUDITWHEEL} = "OFF" ]; then - patch_elf_depend_lib - fi + patch_elf_depend_lib_mgb_mge cd ${BUILD_DIR}/staging/ ${PYTHON_DIR}/bin/python setup.py bdist_wheel cd /home/output - if [ ${USE_AUDITWHEEL} = "ON" ]; then - LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl - else - mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME} - cd ${BUILD_DIR}/staging/dist/ - org_whl_name=`ls Meg*${ver}*.whl` - compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` - echo "org whl name: ${org_whl_name}" - echo "comapt whl name: ${compat_whl_name}" - mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} - cd /home/output - fi + mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME} + cd ${BUILD_DIR}/staging/dist/ + org_whl_name=`ls Meg*${ver}*.whl` + compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` + echo "org whl name: ${org_whl_name}" + echo "comapt whl name: ${compat_whl_name}" + mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} + cd /home/output chown -R ${UID}.${UID} . # compat for root-less docker env to remove output at host side chmod -R 777 . echo "python $ver done" done -