diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 1131d90b..49504105 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -216,7 +216,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_QS8) { } } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST_F(CUDA, CONV_BIAS_NCHW_QS8) { //! not support NonlineMode::SIGMOID and NonlineMode::H_SWISH require_compute_capability(6, 1); diff --git a/imperative/CMakeLists.txt b/imperative/CMakeLists.txt index 2bc3b5f6..46d2e133 100644 --- a/imperative/CMakeLists.txt +++ b/imperative/CMakeLists.txt @@ -63,6 +63,7 @@ add_custom_command( TARGET ${MODULE_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_SOURCE_DIR}/LICENSE ${PROJECT_SOURCE_DIR}/ACKNOWLEDGMENTS ${PROJECT_BINARY_DIR} COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/core/$ # clean develop + COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine/version.py # clean develop COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/megengine ${CMAKE_CURRENT_BINARY_DIR}/python/megengine COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/python/test ${CMAKE_CURRENT_BINARY_DIR}/python/test COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/python/setup.py ${CMAKE_CURRENT_BINARY_DIR}/python/setup.py diff --git a/scripts/whl/BUILD_PYTHON_WHL_README.md b/scripts/whl/BUILD_PYTHON_WHL_README.md index 7143e97d..fe883abd 100755 --- a/scripts/whl/BUILD_PYTHON_WHL_README.md +++ b/scripts/whl/BUILD_PYTHON_WHL_README.md @@ -74,7 +74,7 @@ ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 * If you just want to build with cpu only version, you can set `BUILD_WHL_CPU_ONLY` environment 'ON'. eg: ```bash -BUILD_WHL_CPU_ONLY="ON" ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cu101 +ALL_PYTHON="36m" ./scripts/whl/manylinux2014/build_wheel_common.sh -sdk cpu ``` ## Build for MacOS diff --git a/scripts/whl/manylinux2014/build_wheel_common.sh b/scripts/whl/manylinux2014/build_wheel_common.sh index 2ca5e51d..8b7d29a2 100755 --- a/scripts/whl/manylinux2014/build_wheel_common.sh +++ b/scripts/whl/manylinux2014/build_wheel_common.sh @@ -9,16 +9,16 @@ local_path=$(dirname $(readlink -f $0)) CUDNN_LIB_DIR="/opt/cudnn/lib64/" CUDA_LIB_DIR="/usr/local/cuda/lib64/" -CUDA_SDK="unknown" +SDK_NAME="unknown" function usage() { - echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112" + echo "use '-sdk cu111' to specify cuda toolkit config, also support cu101, cu112, cpu" } while [ "$1" != "" ]; do case $1 in -sdk) shift - CUDA_SDK=$1 + SDK_NAME=$1 shift ;; *) @@ -27,17 +27,16 @@ while [ "$1" != "" ]; do esac done -echo "Build with ${CUDA_SDK}" +echo "Build with ${SDK_NAME}" -if [ $CUDA_SDK == "cu101" ];then +if [ $SDK_NAME == "cu101" ];then COPY_LIB_LIST="${CUDA_LIB_DIR}/libnvrtc.so.10.1" EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=OFF" - OUT_DIR="cu101" BUILD_GCC8="ON" REQUIR_CUDA_VERSION="10010" REQUIR_CUDNN_VERSION="7.6.3" REQUIR_TENSORRT_VERSION="6.0.1.5" -elif [ $CUDA_SDK == "cu111" ];then +elif [ $SDK_NAME == "cu111" ];then COPY_LIB_LIST="\ ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\ @@ -56,11 +55,10 @@ elif [ $CUDA_SDK == "cu111" ];then arch=compute_80,code=sm_80 \ arch=compute_86,code=sm_86 \ arch=compute_86,code=compute_86" - OUT_DIR="cu111" REQUIR_CUDA_VERSION="11010" - REQUIR_CUDNN_VERSION="8.0.5" + REQUIR_CUDNN_VERSION="8.0.4" REQUIR_TENSORRT_VERSION="7.2.2.3" -elif [ $CUDA_SDK == "cu112" ];then +elif [ $SDK_NAME == "cu112" ];then COPY_LIB_LIST="\ ${CUDA_LIB_DIR}/libnvrtc.so.11.2:\ ${CUDA_LIB_DIR}/libcublasLt.so.11:\ @@ -79,16 +77,17 @@ elif [ $CUDA_SDK == "cu112" ];then arch=compute_80,code=sm_80 \ arch=compute_86,code=sm_86 \ arch=compute_86,code=compute_86" - OUT_DIR="cu112" REQUIR_CUDA_VERSION="11020" - REQUIR_CUDNN_VERSION="8.0.5" + REQUIR_CUDNN_VERSION="8.0.4" REQUIR_TENSORRT_VERSION="7.2.2.3" +elif [ $SDK_NAME == "cpu" ];then + echo "use $SDK_NAME without cuda support" + BUILD_WHL_CPU_ONLY="ON" else - echo "no support sdk ${CUDA_SDK}, please set by '-sdk cu111'" + echo "no support sdk ${SDK_NAME}, please set by '-sdk cu111'" exit -1 fi -BUILD_WHL_CPU_ONLY=${BUILD_WHL_CPU_ONLY} if [[ -z ${BUILD_WHL_CPU_ONLY} ]] then BUILD_WHL_CPU_ONLY="OFF" @@ -205,7 +204,7 @@ docker run --rm -it $TMPFS_ARGS \ -e ALL_PYTHON="${ALL_PYTHON}" \ -e EXTRA_CMAKE_FLAG="$EXTRA_CMAKE_FLAG" \ -e COPY_LIB_LIST="$COPY_LIB_LIST" \ - -e OUT_DIR="$OUT_DIR" \ + -e SDK_NAME="$SDK_NAME" \ -v ${CUDA_ROOT_DIR}:/usr/local/cuda \ -v ${CUDNN_ROOT_DIR}:/opt/cudnn \ -v ${TENSORRT_ROOT_DIR}:/opt/tensorrt \ diff --git a/scripts/whl/manylinux2014/do_build_common.sh b/scripts/whl/manylinux2014/do_build_common.sh index d7fdbd0c..3e313612 100755 --- a/scripts/whl/manylinux2014/do_build_common.sh +++ b/scripts/whl/manylinux2014/do_build_common.sh @@ -119,13 +119,13 @@ do if [ ${USE_AUDITWHEEL} = "ON" ]; then LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L ${NEW_LIB_PATH} ${BUILD_DIR}/staging/dist/Meg*.whl else - mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR} + mkdir -p ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME} cd ${BUILD_DIR}/staging/dist/ org_whl_name=`ls Meg*${ver}*.whl` compat_whl_name=`echo ${org_whl_name} | sed 's/linux/manylinux2014/'` echo "org whl name: ${org_whl_name}" echo "comapt whl name: ${compat_whl_name}" - mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${OUT_DIR}/${compat_whl_name} + mv ${org_whl_name} ${SRC_DIR}/scripts/whl/manylinux2014/output/wheelhouse/${SDK_NAME}/${compat_whl_name} cd /home/output fi chown -R ${UID}.${UID} . diff --git a/src/gopt/test/inference.cpp b/src/gopt/test/inference.cpp index 67b9498b..b49319fc 100644 --- a/src/gopt/test/inference.cpp +++ b/src/gopt/test/inference.cpp @@ -1836,7 +1836,7 @@ TEST(TestEnableTensorCore, SmallInputShape) { } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestEnableTensorCore, Nchw4Nchw) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -1936,7 +1936,7 @@ TEST(TestEnableTensorCore, Nchw4Nchw) { #endif //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestEnableTensorCore, ConvBiasWithZ) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2002,7 +2002,7 @@ TEST(TestEnableTensorCore, ConvBiasWithZ) { #endif //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestEnableTensorCore, Pooling) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2148,7 +2148,7 @@ TEST(TestGoptInference, EnableTensorCore) { } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(FuseConvBiasZPass, BlockFuse) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2273,7 +2273,7 @@ TEST(FuseConvBiasZPass, BlockFuse) { #endif //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestEnableTensorCore, ShuffleMerge) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2450,7 +2450,7 @@ TEST(FuseConvBiasZPass, Basic) { #if MGB_CUDA //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, EnableCHWN4) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2552,7 +2552,7 @@ TEST(TestGoptInference, EnableCHWN4) { #endif //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, EnableCHWN4WarpPespective) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2731,7 +2731,7 @@ TEST(TestGoptInference, EnableCHWN4Pooling) { } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -2857,7 +2857,7 @@ TEST(TestGoptInference, EnableCHWN4ShuffleRemove) { #endif //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, ConvertFormatNCHW4GPU) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -3076,7 +3076,7 @@ TEST(TestGoptInference, ConvertFormatNCHW4) { } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, ConvertFormatNCHW4Ic3) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); @@ -3945,7 +3945,7 @@ TEST(TestGoptInference, FoldingConvDimshuffle) { } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestGoptInference, FoldingConvDimshuffleNCHW4NCHW32) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0"); diff --git a/src/tensorrt/test/opr_replace.cpp b/src/tensorrt/test/opr_replace.cpp index f62d6383..2635a482 100644 --- a/src/tensorrt/test/opr_replace.cpp +++ b/src/tensorrt/test/opr_replace.cpp @@ -1978,7 +1978,7 @@ TEST(TestTensorRTReplace, FuseConvAdd) { MGB_ASSERT_TENSOR_NEAR(outputs[1], outputs[3], 1e-3); } //! close for cu111 ci, reopen it when bug fixed -#if 0 +#if CUDA_VERSION < 11000 TEST(TestTensorRTReplace, FuseConvAddNchw2nchw4) { REQUIRE_GPU(1); auto cn = CompNode::load("gpu0");