GitOrigin-RevId: 7461de704e
tags/v1.7.2.m1
@@ -153,6 +153,65 @@ void TensorRTOpr::GpuAllocator::free(void* memory) { | |||||
} | } | ||||
/* ========================== TensorRTManager ========================== */ | /* ========================== TensorRTManager ========================== */ | ||||
const intl::TensorRTUniquePtr<nvinfer1::IExecutionContext>& TensorRTManager:: | |||||
create_trt_context( | |||||
const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine) { | |||||
if (!m_context) { | |||||
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; | |||||
#if NV_TENSOR_RT_VERSION >= 6001 | |||||
for (size_t i = 0; i < inp_shape.size(); ++i) { | |||||
auto dims = m_context->getBindingDimensions(i); | |||||
for (int j = 0; j < dims.nbDims; j++) { | |||||
if (dims.d[j] == -1) { | |||||
dims.d[j] = inp_shape.at(i)[j]; | |||||
} | |||||
} | |||||
m_context->setBindingDimensions(i, dims); | |||||
} | |||||
// check if input shape is set correctly | |||||
for (int i = inp_shape.size(); i < engine->getNbBindings(); ++i) { | |||||
auto dims = m_context->getBindingDimensions(i); | |||||
if (dims.nbDims == -1) { | |||||
for (int j = 0; j < engine->getNbOptimizationProfiles(); j++) { | |||||
mgb_log_debug("TensorRT profile %d:\n", j); | |||||
for (size_t k = 0; k < inp_shape.size(); k++) { | |||||
mgb_log_debug( | |||||
"input[%zu]'s minimum shape is: %s\n", k, | |||||
TensorRTOpr::dims2shape( | |||||
engine->getProfileDimensions( | |||||
j, k, | |||||
nvinfer1::OptProfileSelector::kMIN)) | |||||
.to_string() | |||||
.c_str()); | |||||
mgb_log_debug( | |||||
"input[%zu]'s optimum shape is: %s\n", k, | |||||
TensorRTOpr::dims2shape( | |||||
engine->getProfileDimensions( | |||||
j, k, | |||||
nvinfer1::OptProfileSelector::kOPT)) | |||||
.to_string() | |||||
.c_str()); | |||||
mgb_log_debug( | |||||
"input[%zu]'s maximum shape is: %s\n", k, | |||||
TensorRTOpr::dims2shape( | |||||
engine->getProfileDimensions( | |||||
j, k, | |||||
nvinfer1::OptProfileSelector::kMAX)) | |||||
.to_string() | |||||
.c_str()); | |||||
} | |||||
} | |||||
mgb_throw( | |||||
MegBrainError, | |||||
"Invalid network output, this might be caused by inconsistent " | |||||
"input shapes.Correct input optimization profiles as above."); | |||||
} | |||||
} | |||||
#endif | |||||
} | |||||
return m_context; | |||||
} | |||||
void TensorRTManager::exec( | void TensorRTManager::exec( | ||||
cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, | cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, | ||||
nvinfer1::ICudaEngine* engine, size_t batch, bool use_trt_profiler) { | nvinfer1::ICudaEngine* engine, size_t batch, bool use_trt_profiler) { | ||||
@@ -169,9 +228,11 @@ void TensorRTManager::exec( | |||||
auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr(); | auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr(); | ||||
bool should_reinit_device_memory = | bool should_reinit_device_memory = | ||||
!m_context || m_device_workspace_memory_ptr != workspace_ptr; | !m_context || m_device_workspace_memory_ptr != workspace_ptr; | ||||
if (!m_context) { | |||||
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; | |||||
TensorShapeArray arr; | |||||
for (auto&& i : opr->input()) { | |||||
arr.push_back(i->shape()); | |||||
} | } | ||||
create_trt_context(arr, engine); | |||||
m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); | m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); | ||||
bool is_trt_opr = false; | bool is_trt_opr = false; | ||||
if (opr->same_type<TensorRTOpr>()) { | if (opr->same_type<TensorRTOpr>()) { | ||||
@@ -101,7 +101,8 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( | |||||
void TensorRTRuntimeOpr::get_output_var_shape( | void TensorRTRuntimeOpr::get_output_var_shape( | ||||
const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { | const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { | ||||
auto batch = inp_shape.at(0)[0]; | auto batch = inp_shape.at(0)[0]; | ||||
auto get_mgb_shape = [this, batch](int binding_idx) -> TensorShape { | |||||
auto&& context = m_manager.create_trt_context(inp_shape, m_engine.get()); | |||||
auto get_mgb_shape = [&](int binding_idx) -> TensorShape { | |||||
auto dims = m_engine->getBindingDimensions(binding_idx); | auto dims = m_engine->getBindingDimensions(binding_idx); | ||||
#if NV_TENSOR_RT_VERSION >= 6001 | #if NV_TENSOR_RT_VERSION >= 6001 | ||||
auto format = m_engine->getBindingFormat(binding_idx); | auto format = m_engine->getBindingFormat(binding_idx); | ||||
@@ -121,8 +122,25 @@ void TensorRTRuntimeOpr::get_output_var_shape( | |||||
dims.d[dims.nbDims - 1] = 4; | dims.d[dims.nbDims - 1] = 4; | ||||
} | } | ||||
#endif | #endif | ||||
return m_trt_engine_has_batch ? TensorRTOpr::dims2shape(dims) | |||||
: TensorRTOpr::dims2shape(dims, batch); | |||||
auto shape = m_trt_engine_has_batch ? TensorRTOpr::dims2shape(dims) | |||||
: TensorRTOpr::dims2shape(dims, batch); | |||||
#if NV_TENSOR_RT_VERSION >= 6001 | |||||
if (static_cast<size_t>(binding_idx) < inp_shape.size()) { | |||||
for (int i = 0; i < dims.nbDims; i++) { | |||||
if (dims.d[i] == -1) { | |||||
shape[i] = inp_shape.at(binding_idx)[i]; | |||||
} | |||||
} | |||||
} else { | |||||
auto trt_infer_dims = context->getBindingDimensions(binding_idx); | |||||
for (int i = 0; i < dims.nbDims; i++) { | |||||
if (dims.d[i] == -1) { | |||||
shape[i] = trt_infer_dims.d[i]; | |||||
} | |||||
} | |||||
} | |||||
#endif | |||||
return shape; | |||||
}; | }; | ||||
for (size_t i = 0; i < inp_shape.size(); ++i) { | for (size_t i = 0; i < inp_shape.size(); ++i) { | ||||
mgb_assert(batch == inp_shape[i][0], "input batchsize not equal"); | mgb_assert(batch == inp_shape[i][0], "input batchsize not equal"); | ||||
@@ -135,6 +153,8 @@ void TensorRTRuntimeOpr::get_output_var_shape( | |||||
out_shape[i] = get_mgb_shape(i + input().size()); | out_shape[i] = get_mgb_shape(i + input().size()); | ||||
} | } | ||||
out_shape.back() = {intl::workspace_size(m_engine.get())}; | out_shape.back() = {intl::workspace_size(m_engine.get())}; | ||||
// must clear context, otherwise it may cause unknwon error. | |||||
m_manager.clear_trt_context(); | |||||
} | } | ||||
void TensorRTRuntimeOpr::add_input_layout_constraint() { | void TensorRTRuntimeOpr::add_input_layout_constraint() { | ||||
@@ -52,6 +52,8 @@ class TensorRTManager { | |||||
void* m_device_workspace_memory_ptr; | void* m_device_workspace_memory_ptr; | ||||
public: | public: | ||||
const TensorRTUniquePtr<nvinfer1::IExecutionContext>& create_trt_context( | |||||
const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine); | |||||
void exec( | void exec( | ||||
cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, | cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, | ||||
nvinfer1::ICudaEngine* engine, size_t batch = 1, | nvinfer1::ICudaEngine* engine, size_t batch = 1, | ||||
@@ -86,7 +86,7 @@ private: | |||||
// note: gpu allocator must be released after other trt objects | // note: gpu allocator must be released after other trt objects | ||||
std::shared_ptr<TensorRTOpr::GpuAllocator> m_gpu_allocator; | std::shared_ptr<TensorRTOpr::GpuAllocator> m_gpu_allocator; | ||||
std::shared_ptr<nvinfer1::ICudaEngine> m_engine; | std::shared_ptr<nvinfer1::ICudaEngine> m_engine; | ||||
intl::TensorRTManager m_manager; | |||||
mutable intl::TensorRTManager m_manager; | |||||
// if m_engine's dims with batch | // if m_engine's dims with batch | ||||
bool m_trt_engine_has_batch; | bool m_trt_engine_has_batch; | ||||
}; // namespace mgb | }; // namespace mgb | ||||