From 198ee0686f693d096c1f463a16d0d77fedc183b6 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 6 Jul 2022 14:26:08 +0800 Subject: [PATCH] feat(mgb/trt): update tensorRT toolchain to 8 GitOrigin-RevId: d7cbb722b84e44ea6f82b03346375b1c5bfa8ea9 --- CMakeLists.txt | 13 +- cmake/tensorrt.cmake | 2 +- scripts/whl/manylinux2014/build_wheel_common.sh | 39 +++++- src/tensorrt/impl/opr_replace.cpp | 147 +++++++++++---------- src/tensorrt/impl/tensorrt_opr.cpp | 14 +- .../include/megbrain/tensorrt/tensorrt_opr.h | 21 ++- 6 files changed, 152 insertions(+), 84 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9660f9f7..fc1ab3a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -813,10 +813,15 @@ if(MGE_WITH_CUDA) message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}") list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${TRT_PLUGIN_LIBRARY}) else() - list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin - -Wl,--no-whole-archive) + if(TensorRT_VERSION_MAJOR GREATER_EQUAL 8) + list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer.so + libnvinfer_plugin.so -Wl,--no-whole-archive) + else() + list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin + -Wl,--no-whole-archive) + endif() endif() - if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) + if(TensorRT_VERSION_MAJOR STREQUAL 7) message(STATUS "handle trt myelin lib after trt7") list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library) @@ -905,7 +910,7 @@ if(MGE_WITH_CUDA) else() if(MGE_WITH_TRT) list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin) - if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) + if(TensorRT_VERSION_MAJOR STREQUAL 7) message(STATUS "handle trt myelin lib after trt7") list(APPEND MGE_CUDA_LIBS libmyelin) endif() diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index 787907d2..2da37bb6 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -120,7 +120,7 @@ set_target_properties( message( STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") -if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) +if(TensorRT_VERSION_MAJOR STREQUAL 7) if(MGE_CUDA_USE_STATIC) find_library( LIBMYELIN_COMPILER diff --git a/scripts/whl/manylinux2014/build_wheel_common.sh b/scripts/whl/manylinux2014/build_wheel_common.sh index a6b212e9..2b231546 100755 --- a/scripts/whl/manylinux2014/build_wheel_common.sh +++ b/scripts/whl/manylinux2014/build_wheel_common.sh @@ -9,9 +9,10 @@ TMPFS_ARGS="--tmpfs /tmp:exec" local_path=$(dirname $(readlink -f $0)) CUDNN_LIB_DIR="/opt/cudnn/lib64/" CUDA_LIB_DIR="/usr/local/cuda/lib64/" +TensorRT_LIB_DIR="/opt/tensorrt/lib/" SDK_NAME="unknown" -x86_64_support_version="cu101 cu111 cu112 cpu" +x86_64_support_version="cu101 cu111 cu112 cpu cu111_cudnn821_tensorRT825" aarch64_support_version="cu102_JetsonNano cu111 cpu" if [[ -z ${IN_CI} ]] then @@ -86,7 +87,10 @@ elif [ $SDK_NAME == "cu102_JetsonNano" ];then ${CUDNN_LIB_DIR}/libcudnn_cnn_train.so.8:\ ${CUDNN_LIB_DIR}/libcudnn_ops_infer.so.8:\ ${CUDNN_LIB_DIR}/libcudnn_ops_train.so.8:\ - ${CUDNN_LIB_DIR}/libcudnn.so.8" + ${CUDNN_LIB_DIR}/libcudnn.so.8:\ + ${TensorRT_LIB_DIR}/libnvinfer_plugin.so.8:\ + ${TensorRT_LIB_DIR}/libnvinfer.so.8" + EXTRA_CMAKE_FLAG="-DMGE_WITH_CUDNN_SHARED=ON -DMGE_WITH_CUBLAS_SHARED=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_53,code=sm_53\" " @@ -131,6 +135,37 @@ elif [ $SDK_NAME == "cu111" ];then -gencode arch=compute_86,code=compute_86\" " fi +elif [ $SDK_NAME == "cu111_cudnn821_tensorRT825" ];then + BUILD_GCC8="ON" + REQUIR_CUDA_VERSION="11010" + REQUIR_CUDNN_VERSION="8.2.1" + REQUIR_TENSORRT_VERSION="8.2.5.1" + REQUIR_CUBLAS_VERSION="11.2.1.74" + + + CUDA_COPY_LIB_LIST="\ + ${CUDA_LIB_DIR}/libnvrtc.so.11.1:\ + ${CUDA_LIB_DIR}/libcublasLt.so.11:\ + ${CUDA_LIB_DIR}/libcublas.so.11:\ + ${CUDNN_LIB_DIR}/libcudnn_adv_infer.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn_adv_train.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn_cnn_infer.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn_cnn_train.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn_ops_infer.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn_ops_train.so.8:\ + ${CUDNN_LIB_DIR}/libcudnn.so.8:\ + ${TensorRT_LIB_DIR}/libnvinfer_plugin.so.8:\ + ${TensorRT_LIB_DIR}/libnvinfer.so.8" + + EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=ON -DMGE_WITH_CUBLAS_SHARED=ON \ + -DMGE_CUDA_GENCODE=\"-gencode arch=compute_61,code=sm_61 \ + -gencode arch=compute_70,code=sm_70 \ + -gencode arch=compute_75,code=sm_75 \ + -gencode arch=compute_80,code=sm_80 \ + -gencode arch=compute_86,code=sm_86 \ + -gencode arch=compute_86,code=compute_86\" " + + elif [ $SDK_NAME == "cu112" ];then BUILD_GCC8="ON" CUDA_COPY_LIB_LIST="\ diff --git a/src/tensorrt/impl/opr_replace.cpp b/src/tensorrt/impl/opr_replace.cpp index 0b73abe0..d52e0afa 100644 --- a/src/tensorrt/impl/opr_replace.cpp +++ b/src/tensorrt/impl/opr_replace.cpp @@ -25,7 +25,6 @@ using namespace cg; template using TensorRTUniquePtr = opr::intl::TensorRTUniquePtr; - namespace { nvinfer1::DataType mgb_dtype_to_trt_dtype(DType dtype) { switch (dtype.enumv()) { @@ -125,7 +124,8 @@ class TensorRTReplacePass::Impl final { // True if var is encountered for the first time. bool check_input( VarNode* var, OperatorNodeBase* opr, - mgb::SmallVector dimtypes = {}); + mgb::SmallVector + dimtypes = {}); HostTensorND get_value(VarNode* var, ConvFormat format = ConvFormat::NCHW); void set_itensor_dynamic_range(VarNode* var, OperatorNodeBase* opr); float get_scale(DType data_type); @@ -652,9 +652,11 @@ public: using Mode = opr::Elemwise::Mode; auto mode = opr->cast_final_safe().param().mode; auto get_dimtype = [&](int ndim) { - SmallVector dimtypes(ndim); + SmallVector + dimtypes(ndim); for (int i = 0; i < ndim; i++) { - dimtypes[i] = nvinfer1::DimensionType::kSPATIAL; + dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL); } return dimtypes; }; @@ -839,81 +841,86 @@ public: set_itensor_dynamic_range(opr->output(0), opr); }; - m_opr_trait[opr::ElemwiseMultiType::typeinfo()].add_to_nvinfer = - [this](nvinfer1::INetworkDefinition* net, OperatorNodeBase* opr) { - auto&& varnode2itensor = - m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor; - size_t ndim0 = opr->input(0)->shape().ndim, - ndim1 = opr->input(1)->shape().ndim; - mgb_assert(ndim0 == ndim1); - size_t tensor_ndim = ndim0; - using Mode = opr::ElemwiseMultiType::Mode; - SmallVector dimtypes(tensor_ndim); - for (size_t i = 0; i < tensor_ndim; i++) { - dimtypes[i] = nvinfer1::DimensionType::kSPATIAL; - } - auto mode = - opr->cast_final_safe().param().mode; - mgb_assert( - mode == Mode::QADD || mode == Mode::QFUSE_ADD_RELU, - "Only QADD and QFUSE_ADD_RELU are supported on CUDA."); - mgb_assert( - opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8, - "output data type %s is not supported", - opr->output(0)->dtype().name()); - check_input(opr->input(0), opr, dimtypes); - check_input(opr->input(1), opr, dimtypes); - auto dims0 = varnode2itensor[opr->input(0)]->getDimensions(), - dims1 = varnode2itensor[opr->input(1)]->getDimensions(); - mgb_throw_if( - dims0.nbDims != dims1.nbDims, AssertionError, - "Input dimensions of two input tensors must be " - "equal (got: %d, %d).", - dims0.nbDims, dims1.nbDims); - auto elem = net->addElementWise( - *varnode2itensor[opr->input(0)], - *varnode2itensor[opr->input(1)], - nvinfer1::ElementWiseOperation::kSUM); - mgb_assert(elem, "construct network failed"); - std::string layer_name = "TRT_ELEM:" + opr->name(); - elem->setName(layer_name.c_str()); - std::string output_name = "TRT_O:" + opr->output()[0]->name(); - elem->getOutput(0)->setName(output_name.c_str()); - varnode2itensor[opr->output(0)] = elem->getOutput(0); - set_itensor_dynamic_range(opr->output(0), opr); - if (mode == Mode::QFUSE_ADD_RELU) { - auto act = net->addActivation( - *varnode2itensor[opr->output(0)], - nvinfer1::ActivationType::kRELU); - mgb_assert(act, "construct network failed"); - std::string layer_name = "TRT_ACTV:" + opr->name(); - act->setName(layer_name.c_str()); - std::string output_name = - "TRT_O:" + opr->output()[0]->name() + "_act"; - act->getOutput(0)->setName(output_name.c_str()); - varnode2itensor[opr->output(0)] = act->getOutput(0); - set_itensor_dynamic_range(opr->output(0), opr); - } - }; + m_opr_trait[opr::ElemwiseMultiType::typeinfo()] + .add_to_nvinfer = [this](nvinfer1::INetworkDefinition* net, + OperatorNodeBase* opr) { + auto&& varnode2itensor = + m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor; + size_t ndim0 = opr->input(0)->shape().ndim, + ndim1 = opr->input(1)->shape().ndim; + mgb_assert(ndim0 == ndim1); + size_t tensor_ndim = ndim0; + using Mode = opr::ElemwiseMultiType::Mode; + SmallVector dimtypes( + tensor_ndim); + for (size_t i = 0; i < tensor_ndim; i++) { + dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL); + } + auto mode = opr->cast_final_safe().param().mode; + mgb_assert( + mode == Mode::QADD || mode == Mode::QFUSE_ADD_RELU, + "Only QADD and QFUSE_ADD_RELU are supported on CUDA."); + mgb_assert( + opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8, + "output data type %s is not supported", + opr->output(0)->dtype().name()); + check_input(opr->input(0), opr, dimtypes); + check_input(opr->input(1), opr, dimtypes); + auto dims0 = varnode2itensor[opr->input(0)]->getDimensions(), + dims1 = varnode2itensor[opr->input(1)]->getDimensions(); + mgb_throw_if( + dims0.nbDims != dims1.nbDims, AssertionError, + "Input dimensions of two input tensors must be " + "equal (got: %d, %d).", + dims0.nbDims, dims1.nbDims); + auto elem = net->addElementWise( + *varnode2itensor[opr->input(0)], *varnode2itensor[opr->input(1)], + nvinfer1::ElementWiseOperation::kSUM); + mgb_assert(elem, "construct network failed"); + std::string layer_name = "TRT_ELEM:" + opr->name(); + elem->setName(layer_name.c_str()); + std::string output_name = "TRT_O:" + opr->output()[0]->name(); + elem->getOutput(0)->setName(output_name.c_str()); + varnode2itensor[opr->output(0)] = elem->getOutput(0); + set_itensor_dynamic_range(opr->output(0), opr); + if (mode == Mode::QFUSE_ADD_RELU) { + auto act = net->addActivation( + *varnode2itensor[opr->output(0)], + nvinfer1::ActivationType::kRELU); + mgb_assert(act, "construct network failed"); + std::string layer_name = "TRT_ACTV:" + opr->name(); + act->setName(layer_name.c_str()); + std::string output_name = "TRT_O:" + opr->output()[0]->name() + "_act"; + act->getOutput(0)->setName(output_name.c_str()); + varnode2itensor[opr->output(0)] = act->getOutput(0); + set_itensor_dynamic_range(opr->output(0), opr); + } + }; auto replace_matmul_opr = [this](nvinfer1::INetworkDefinition* net, OperatorNodeBase* opr) { auto&& varnode2itensor = m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor; - SmallVector dimtypes; + SmallVector dimtypes; bool transposeA = false, transposeB = false; if (opr->same_type()) { dimtypes = { - nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}; + TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL), + TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL)}; transposeA = opr->cast_final_safe().param().transposeA; transposeB = opr->cast_final_safe().param().transposeB; } else { mgb_assert(opr->same_type()); dimtypes = { - nvinfer1::DimensionType::kINDEX, - nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}; + TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kINDEX), + TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL), + TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL)}; transposeA = opr->cast_final_safe() .param() .transposeA; @@ -957,9 +964,11 @@ public: auto&& varnode2itensor = m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor; size_t tensor_ndim = opr->input(0)->shape().ndim; - SmallVector dimtypes(tensor_ndim); + SmallVector dimtypes( + tensor_ndim); for (size_t i = 0; i < tensor_ndim; i++) { - dimtypes[i] = nvinfer1::DimensionType::kSPATIAL; + dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE( + nvinfer1::DimensionType::kSPATIAL); } check_input(opr->input(0), opr, dimtypes); auto host_one = HostTensorND( @@ -1094,7 +1103,7 @@ VarNodeArray TensorRTReplacePass::Impl::find_parent_conv(OperatorNodeBase* inp_o bool TensorRTReplacePass::Impl::check_input( VarNode* var, OperatorNodeBase* opr, - SmallVector dimtypes) { + SmallVector dimtypes) { auto trt_graph = m_tensorrt_graphs[m_graph_map[opr] - 1]; auto&& varnode2itensor = trt_graph->varnode2itensor; auto iter = trt_graph->inputs.find(var); diff --git a/src/tensorrt/impl/tensorrt_opr.cpp b/src/tensorrt/impl/tensorrt_opr.cpp index 033b58a7..0fd22e04 100644 --- a/src/tensorrt/impl/tensorrt_opr.cpp +++ b/src/tensorrt/impl/tensorrt_opr.cpp @@ -21,12 +21,14 @@ public: typedef std::pair Record; std::vector profile; - void reportLayerTime(const char* layerName, float ms) override; + void reportLayerTime(const char* layerName, float ms) + TENSORRT_NO_EXCEPT(noexcept) override; void print_layer_times(); std::shared_ptr to_json(); }; -void TensorRTProfiler::reportLayerTime(const char* layerName, float ms) { +void TensorRTProfiler::reportLayerTime(const char* layerName, float ms) + TENSORRT_NO_EXCEPT(noexcept) { profile.push_back(std::make_pair(layerName, ms)); } @@ -45,7 +47,8 @@ void TensorRTProfiler::print_layer_times() { /* ========================== Logger ========================== */ -void TensorRTOpr::Logger::log(nvinfer1::ILogger::Severity severity, const char* msg) { +void TensorRTOpr::Logger::log(nvinfer1::ILogger::Severity severity, const char* msg) + TENSORRT_NO_EXCEPT(noexcept) { switch (severity) { case Severity::kINTERNAL_ERROR: mgb_log("TRT_INTERNAL_ERROR: %s", msg); @@ -112,7 +115,8 @@ TensorRTOpr::GpuAllocator::~GpuAllocator() noexcept { } void* TensorRTOpr::GpuAllocator::allocate( - uint64_t size, uint64_t alignment, uint32_t flags) { + uint64_t size, uint64_t alignment, uint32_t flags) + TENSORRT_NO_EXCEPT(noexcept) { static bool enable_log = getenv("MGB_LOG_TRT_MEM_ALLOC"); mgb_assert( !flags && !(alignment & (alignment - 1)), "flags=%u alignment=%" PRIu64, @@ -132,7 +136,7 @@ void* TensorRTOpr::GpuAllocator::allocate( return ret; } -void TensorRTOpr::GpuAllocator::free(void* memory) { +void TensorRTOpr::GpuAllocator::free(void* memory) TENSORRT_NO_EXCEPT(noexcept) { { auto iter = m_ptr2size.find(memory); mgb_assert(iter != m_ptr2size.end(), "ptr %p not found", memory); diff --git a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h index 4b7863fc..9fd2ec36 100644 --- a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h +++ b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h @@ -15,6 +15,19 @@ ((NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + \ NV_TENSORRT_PATCH) // major, minor, patch +// some api has been changed in TentorRT8 +#if (NV_TENSOR_RT_VERSION >= 8001) +enum class Empty : int32_t {}; +#define TENSORRT_NO_DIMENSIONTYPE(api) Empty +#define TENSORRT_NO_DIMENSIONTYPE_VALUE(api) \ + {} +#define TENSORRT_NO_EXCEPT(api) api +#else +#define TENSORRT_NO_DIMENSIONTYPE(api) api +#define TENSORRT_NO_DIMENSIONTYPE_VALUE(api) api +#define TENSORRT_NO_EXCEPT(api) +#endif + namespace mgb { namespace opr { @@ -171,7 +184,8 @@ class TensorRTOpr::Logger final : public nvinfer1::ILogger, NonCopyableObj { Logger(); public: - void log(nvinfer1::ILogger::Severity severity, const char* msg) override; + void log(nvinfer1::ILogger::Severity severity, const char* msg) + TENSORRT_NO_EXCEPT(noexcept) override; static Logger& instance(); }; @@ -184,8 +198,9 @@ public: explicit GpuAllocator(CompNode cn); ~GpuAllocator() noexcept; - void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override; - void free(void* memory) override; + void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) + TENSORRT_NO_EXCEPT(noexcept) override; + void free(void* memory) TENSORRT_NO_EXCEPT(noexcept) override; CompNode comp_node() const { return m_cn; } };