Browse Source

feat(mgb/trt): update tensorRT toolchain to 8

GitOrigin-RevId: d7cbb722b8
HuaHua404-patch-4
Megvii Engine Team 2 years ago
parent
commit
198ee0686f
6 changed files with 152 additions and 84 deletions
  1. +9
    -4
      CMakeLists.txt
  2. +1
    -1
      cmake/tensorrt.cmake
  3. +37
    -2
      scripts/whl/manylinux2014/build_wheel_common.sh
  4. +78
    -69
      src/tensorrt/impl/opr_replace.cpp
  5. +9
    -5
      src/tensorrt/impl/tensorrt_opr.cpp
  6. +18
    -3
      src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h

+ 9
- 4
CMakeLists.txt View File

@@ -813,10 +813,15 @@ if(MGE_WITH_CUDA)
message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${TRT_PLUGIN_LIBRARY})
else()
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin
-Wl,--no-whole-archive)
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 8)
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer.so
libnvinfer_plugin.so -Wl,--no-whole-archive)
else()
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin
-Wl,--no-whole-archive)
endif()
endif()
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
if(TensorRT_VERSION_MAJOR STREQUAL 7)
message(STATUS "handle trt myelin lib after trt7")
list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor
libmyelin_pattern_runtime libmyelin_pattern_library)
@@ -905,7 +910,7 @@ if(MGE_WITH_CUDA)
else()
if(MGE_WITH_TRT)
list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin)
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
if(TensorRT_VERSION_MAJOR STREQUAL 7)
message(STATUS "handle trt myelin lib after trt7")
list(APPEND MGE_CUDA_LIBS libmyelin)
endif()


+ 1
- 1
cmake/tensorrt.cmake View File

@@ -120,7 +120,7 @@ set_target_properties(
message(
STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})")

if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
if(TensorRT_VERSION_MAJOR STREQUAL 7)
if(MGE_CUDA_USE_STATIC)
find_library(
LIBMYELIN_COMPILER


+ 37
- 2
scripts/whl/manylinux2014/build_wheel_common.sh View File

@@ -9,9 +9,10 @@ TMPFS_ARGS="--tmpfs /tmp:exec"
local_path=$(dirname $(readlink -f $0))
CUDNN_LIB_DIR="/opt/cudnn/lib64/"
CUDA_LIB_DIR="/usr/local/cuda/lib64/"
TensorRT_LIB_DIR="/opt/tensorrt/lib/"

SDK_NAME="unknown"
x86_64_support_version="cu101 cu111 cu112 cpu"
x86_64_support_version="cu101 cu111 cu112 cpu cu111_cudnn821_tensorRT825"
aarch64_support_version="cu102_JetsonNano cu111 cpu"
if [[ -z ${IN_CI} ]]
then
@@ -86,7 +87,10 @@ elif [ $SDK_NAME == "cu102_JetsonNano" ];then
${CUDNN_LIB_DIR}/libcudnn_cnn_train.so.8:\
${CUDNN_LIB_DIR}/libcudnn_ops_infer.so.8:\
${CUDNN_LIB_DIR}/libcudnn_ops_train.so.8:\
${CUDNN_LIB_DIR}/libcudnn.so.8"
${CUDNN_LIB_DIR}/libcudnn.so.8:\
${TensorRT_LIB_DIR}/libnvinfer_plugin.so.8:\
${TensorRT_LIB_DIR}/libnvinfer.so.8"


EXTRA_CMAKE_FLAG="-DMGE_WITH_CUDNN_SHARED=ON -DMGE_WITH_CUBLAS_SHARED=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_53,code=sm_53\" "

@@ -131,6 +135,37 @@ elif [ $SDK_NAME == "cu111" ];then
-gencode arch=compute_86,code=compute_86\" "
fi

elif [ $SDK_NAME == "cu111_cudnn821_tensorRT825" ];then
BUILD_GCC8="ON"
REQUIR_CUDA_VERSION="11010"
REQUIR_CUDNN_VERSION="8.2.1"
REQUIR_TENSORRT_VERSION="8.2.5.1"
REQUIR_CUBLAS_VERSION="11.2.1.74"

CUDA_COPY_LIB_LIST="\
${CUDA_LIB_DIR}/libnvrtc.so.11.1:\
${CUDA_LIB_DIR}/libcublasLt.so.11:\
${CUDA_LIB_DIR}/libcublas.so.11:\
${CUDNN_LIB_DIR}/libcudnn_adv_infer.so.8:\
${CUDNN_LIB_DIR}/libcudnn_adv_train.so.8:\
${CUDNN_LIB_DIR}/libcudnn_cnn_infer.so.8:\
${CUDNN_LIB_DIR}/libcudnn_cnn_train.so.8:\
${CUDNN_LIB_DIR}/libcudnn_ops_infer.so.8:\
${CUDNN_LIB_DIR}/libcudnn_ops_train.so.8:\
${CUDNN_LIB_DIR}/libcudnn.so.8:\
${TensorRT_LIB_DIR}/libnvinfer_plugin.so.8:\
${TensorRT_LIB_DIR}/libnvinfer.so.8"

EXTRA_CMAKE_FLAG=" -DMGE_WITH_CUDNN_SHARED=ON -DMGE_WITH_CUBLAS_SHARED=ON \
-DMGE_CUDA_GENCODE=\"-gencode arch=compute_61,code=sm_61 \
-gencode arch=compute_70,code=sm_70 \
-gencode arch=compute_75,code=sm_75 \
-gencode arch=compute_80,code=sm_80 \
-gencode arch=compute_86,code=sm_86 \
-gencode arch=compute_86,code=compute_86\" "


elif [ $SDK_NAME == "cu112" ];then
BUILD_GCC8="ON"
CUDA_COPY_LIB_LIST="\


+ 78
- 69
src/tensorrt/impl/opr_replace.cpp View File

@@ -25,7 +25,6 @@ using namespace cg;

template <typename T>
using TensorRTUniquePtr = opr::intl::TensorRTUniquePtr<T>;

namespace {
nvinfer1::DataType mgb_dtype_to_trt_dtype(DType dtype) {
switch (dtype.enumv()) {
@@ -125,7 +124,8 @@ class TensorRTReplacePass::Impl final {
// True if var is encountered for the first time.
bool check_input(
VarNode* var, OperatorNodeBase* opr,
mgb::SmallVector<nvinfer1::DimensionType> dimtypes = {});
mgb::SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)>
dimtypes = {});
HostTensorND get_value(VarNode* var, ConvFormat format = ConvFormat::NCHW);
void set_itensor_dynamic_range(VarNode* var, OperatorNodeBase* opr);
float get_scale(DType data_type);
@@ -652,9 +652,11 @@ public:
using Mode = opr::Elemwise::Mode;
auto mode = opr->cast_final_safe<opr::Elemwise>().param().mode;
auto get_dimtype = [&](int ndim) {
SmallVector<nvinfer1::DimensionType> dimtypes(ndim);
SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)>
dimtypes(ndim);
for (int i = 0; i < ndim; i++) {
dimtypes[i] = nvinfer1::DimensionType::kSPATIAL;
dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL);
}
return dimtypes;
};
@@ -839,81 +841,86 @@ public:
set_itensor_dynamic_range(opr->output(0), opr);
};

m_opr_trait[opr::ElemwiseMultiType::typeinfo()].add_to_nvinfer =
[this](nvinfer1::INetworkDefinition* net, OperatorNodeBase* opr) {
auto&& varnode2itensor =
m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor;
size_t ndim0 = opr->input(0)->shape().ndim,
ndim1 = opr->input(1)->shape().ndim;
mgb_assert(ndim0 == ndim1);
size_t tensor_ndim = ndim0;
using Mode = opr::ElemwiseMultiType::Mode;
SmallVector<nvinfer1::DimensionType> dimtypes(tensor_ndim);
for (size_t i = 0; i < tensor_ndim; i++) {
dimtypes[i] = nvinfer1::DimensionType::kSPATIAL;
}
auto mode =
opr->cast_final_safe<opr::ElemwiseMultiType>().param().mode;
mgb_assert(
mode == Mode::QADD || mode == Mode::QFUSE_ADD_RELU,
"Only QADD and QFUSE_ADD_RELU are supported on CUDA.");
mgb_assert(
opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8,
"output data type %s is not supported",
opr->output(0)->dtype().name());
check_input(opr->input(0), opr, dimtypes);
check_input(opr->input(1), opr, dimtypes);
auto dims0 = varnode2itensor[opr->input(0)]->getDimensions(),
dims1 = varnode2itensor[opr->input(1)]->getDimensions();
mgb_throw_if(
dims0.nbDims != dims1.nbDims, AssertionError,
"Input dimensions of two input tensors must be "
"equal (got: %d, %d).",
dims0.nbDims, dims1.nbDims);
auto elem = net->addElementWise(
*varnode2itensor[opr->input(0)],
*varnode2itensor[opr->input(1)],
nvinfer1::ElementWiseOperation::kSUM);
mgb_assert(elem, "construct network failed");
std::string layer_name = "TRT_ELEM:" + opr->name();
elem->setName(layer_name.c_str());
std::string output_name = "TRT_O:" + opr->output()[0]->name();
elem->getOutput(0)->setName(output_name.c_str());
varnode2itensor[opr->output(0)] = elem->getOutput(0);
set_itensor_dynamic_range(opr->output(0), opr);
if (mode == Mode::QFUSE_ADD_RELU) {
auto act = net->addActivation(
*varnode2itensor[opr->output(0)],
nvinfer1::ActivationType::kRELU);
mgb_assert(act, "construct network failed");
std::string layer_name = "TRT_ACTV:" + opr->name();
act->setName(layer_name.c_str());
std::string output_name =
"TRT_O:" + opr->output()[0]->name() + "_act";
act->getOutput(0)->setName(output_name.c_str());
varnode2itensor[opr->output(0)] = act->getOutput(0);
set_itensor_dynamic_range(opr->output(0), opr);
}
};
m_opr_trait[opr::ElemwiseMultiType::typeinfo()]
.add_to_nvinfer = [this](nvinfer1::INetworkDefinition* net,
OperatorNodeBase* opr) {
auto&& varnode2itensor =
m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor;
size_t ndim0 = opr->input(0)->shape().ndim,
ndim1 = opr->input(1)->shape().ndim;
mgb_assert(ndim0 == ndim1);
size_t tensor_ndim = ndim0;
using Mode = opr::ElemwiseMultiType::Mode;
SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)> dimtypes(
tensor_ndim);
for (size_t i = 0; i < tensor_ndim; i++) {
dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL);
}
auto mode = opr->cast_final_safe<opr::ElemwiseMultiType>().param().mode;
mgb_assert(
mode == Mode::QADD || mode == Mode::QFUSE_ADD_RELU,
"Only QADD and QFUSE_ADD_RELU are supported on CUDA.");
mgb_assert(
opr->output(0)->dtype().enumv() == DTypeEnum::QuantizedS8,
"output data type %s is not supported",
opr->output(0)->dtype().name());
check_input(opr->input(0), opr, dimtypes);
check_input(opr->input(1), opr, dimtypes);
auto dims0 = varnode2itensor[opr->input(0)]->getDimensions(),
dims1 = varnode2itensor[opr->input(1)]->getDimensions();
mgb_throw_if(
dims0.nbDims != dims1.nbDims, AssertionError,
"Input dimensions of two input tensors must be "
"equal (got: %d, %d).",
dims0.nbDims, dims1.nbDims);
auto elem = net->addElementWise(
*varnode2itensor[opr->input(0)], *varnode2itensor[opr->input(1)],
nvinfer1::ElementWiseOperation::kSUM);
mgb_assert(elem, "construct network failed");
std::string layer_name = "TRT_ELEM:" + opr->name();
elem->setName(layer_name.c_str());
std::string output_name = "TRT_O:" + opr->output()[0]->name();
elem->getOutput(0)->setName(output_name.c_str());
varnode2itensor[opr->output(0)] = elem->getOutput(0);
set_itensor_dynamic_range(opr->output(0), opr);
if (mode == Mode::QFUSE_ADD_RELU) {
auto act = net->addActivation(
*varnode2itensor[opr->output(0)],
nvinfer1::ActivationType::kRELU);
mgb_assert(act, "construct network failed");
std::string layer_name = "TRT_ACTV:" + opr->name();
act->setName(layer_name.c_str());
std::string output_name = "TRT_O:" + opr->output()[0]->name() + "_act";
act->getOutput(0)->setName(output_name.c_str());
varnode2itensor[opr->output(0)] = act->getOutput(0);
set_itensor_dynamic_range(opr->output(0), opr);
}
};

auto replace_matmul_opr = [this](nvinfer1::INetworkDefinition* net,
OperatorNodeBase* opr) {
auto&& varnode2itensor =
m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor;
SmallVector<nvinfer1::DimensionType> dimtypes;
SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)> dimtypes;
bool transposeA = false, transposeB = false;
if (opr->same_type<opr::MatrixMul>()) {
dimtypes = {
nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL};
TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL),
TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL)};
transposeA = opr->cast_final_safe<opr::MatrixMul>().param().transposeA;
transposeB = opr->cast_final_safe<opr::MatrixMul>().param().transposeB;
} else {
mgb_assert(opr->same_type<opr::BatchedMatrixMul>());
dimtypes = {
nvinfer1::DimensionType::kINDEX,
nvinfer1::DimensionType::kSPATIAL,
nvinfer1::DimensionType::kSPATIAL};
TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kINDEX),
TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL),
TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL)};
transposeA = opr->cast_final_safe<opr::BatchedMatrixMul>()
.param()
.transposeA;
@@ -957,9 +964,11 @@ public:
auto&& varnode2itensor =
m_tensorrt_graphs[m_graph_map[opr] - 1]->varnode2itensor;
size_t tensor_ndim = opr->input(0)->shape().ndim;
SmallVector<nvinfer1::DimensionType> dimtypes(tensor_ndim);
SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)> dimtypes(
tensor_ndim);
for (size_t i = 0; i < tensor_ndim; i++) {
dimtypes[i] = nvinfer1::DimensionType::kSPATIAL;
dimtypes[i] = TENSORRT_NO_DIMENSIONTYPE_VALUE(
nvinfer1::DimensionType::kSPATIAL);
}
check_input(opr->input(0), opr, dimtypes);
auto host_one = HostTensorND(
@@ -1094,7 +1103,7 @@ VarNodeArray TensorRTReplacePass::Impl::find_parent_conv(OperatorNodeBase* inp_o

bool TensorRTReplacePass::Impl::check_input(
VarNode* var, OperatorNodeBase* opr,
SmallVector<nvinfer1::DimensionType> dimtypes) {
SmallVector<TENSORRT_NO_DIMENSIONTYPE(nvinfer1::DimensionType)> dimtypes) {
auto trt_graph = m_tensorrt_graphs[m_graph_map[opr] - 1];
auto&& varnode2itensor = trt_graph->varnode2itensor;
auto iter = trt_graph->inputs.find(var);


+ 9
- 5
src/tensorrt/impl/tensorrt_opr.cpp View File

@@ -21,12 +21,14 @@ public:
typedef std::pair<std::string, float> Record;
std::vector<Record> profile;

void reportLayerTime(const char* layerName, float ms) override;
void reportLayerTime(const char* layerName, float ms)
TENSORRT_NO_EXCEPT(noexcept) override;
void print_layer_times();
std::shared_ptr<json::Value> to_json();
};

void TensorRTProfiler::reportLayerTime(const char* layerName, float ms) {
void TensorRTProfiler::reportLayerTime(const char* layerName, float ms)
TENSORRT_NO_EXCEPT(noexcept) {
profile.push_back(std::make_pair(layerName, ms));
}

@@ -45,7 +47,8 @@ void TensorRTProfiler::print_layer_times() {

/* ========================== Logger ========================== */

void TensorRTOpr::Logger::log(nvinfer1::ILogger::Severity severity, const char* msg) {
void TensorRTOpr::Logger::log(nvinfer1::ILogger::Severity severity, const char* msg)
TENSORRT_NO_EXCEPT(noexcept) {
switch (severity) {
case Severity::kINTERNAL_ERROR:
mgb_log("TRT_INTERNAL_ERROR: %s", msg);
@@ -112,7 +115,8 @@ TensorRTOpr::GpuAllocator::~GpuAllocator() noexcept {
}

void* TensorRTOpr::GpuAllocator::allocate(
uint64_t size, uint64_t alignment, uint32_t flags) {
uint64_t size, uint64_t alignment, uint32_t flags)
TENSORRT_NO_EXCEPT(noexcept) {
static bool enable_log = getenv("MGB_LOG_TRT_MEM_ALLOC");
mgb_assert(
!flags && !(alignment & (alignment - 1)), "flags=%u alignment=%" PRIu64,
@@ -132,7 +136,7 @@ void* TensorRTOpr::GpuAllocator::allocate(
return ret;
}

void TensorRTOpr::GpuAllocator::free(void* memory) {
void TensorRTOpr::GpuAllocator::free(void* memory) TENSORRT_NO_EXCEPT(noexcept) {
{
auto iter = m_ptr2size.find(memory);
mgb_assert(iter != m_ptr2size.end(), "ptr %p not found", memory);


+ 18
- 3
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h View File

@@ -15,6 +15,19 @@
((NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + \
NV_TENSORRT_PATCH) // major, minor, patch

// some api has been changed in TentorRT8
#if (NV_TENSOR_RT_VERSION >= 8001)
enum class Empty : int32_t {};
#define TENSORRT_NO_DIMENSIONTYPE(api) Empty
#define TENSORRT_NO_DIMENSIONTYPE_VALUE(api) \
{}
#define TENSORRT_NO_EXCEPT(api) api
#else
#define TENSORRT_NO_DIMENSIONTYPE(api) api
#define TENSORRT_NO_DIMENSIONTYPE_VALUE(api) api
#define TENSORRT_NO_EXCEPT(api)
#endif

namespace mgb {
namespace opr {

@@ -171,7 +184,8 @@ class TensorRTOpr::Logger final : public nvinfer1::ILogger, NonCopyableObj {
Logger();

public:
void log(nvinfer1::ILogger::Severity severity, const char* msg) override;
void log(nvinfer1::ILogger::Severity severity, const char* msg)
TENSORRT_NO_EXCEPT(noexcept) override;
static Logger& instance();
};

@@ -184,8 +198,9 @@ public:
explicit GpuAllocator(CompNode cn);
~GpuAllocator() noexcept;

void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) override;
void free(void* memory) override;
void* allocate(uint64_t size, uint64_t alignment, uint32_t flags)
TENSORRT_NO_EXCEPT(noexcept) override;
void free(void* memory) TENSORRT_NO_EXCEPT(noexcept) override;

CompNode comp_node() const { return m_cn; }
};


Loading…
Cancel
Save