diff --git a/src/tensorrt/impl/tensorrt_opr.cpp b/src/tensorrt/impl/tensorrt_opr.cpp index f7647a0c..bebb0fce 100644 --- a/src/tensorrt/impl/tensorrt_opr.cpp +++ b/src/tensorrt/impl/tensorrt_opr.cpp @@ -227,8 +227,10 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, #endif mgb_assert(exec_success, "TensorRTOpr failed in execution."); } else { +#if MGB_ENABLE_JSON TensorRTProfiler trt_profiler; m_context->setProfiler(&trt_profiler); +#endif // MGB_ENABLE_JSON // TensorRT documentation stated that IExecutionContext->execute // "Synchronously execute inference on a batch", and it does not take a // cudaStream_t, we expect it do a device synchronize. But it seems like @@ -245,8 +247,10 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, exec_success = m_context->execute(batch, m_trt_iobuf.data()); #endif mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); +#if MGB_ENABLE_JSON printf("TRT profile info of opr %s:\n", opr->name().c_str()); trt_profiler.print_layer_times(); +#endif // MGB_ENABLE_JSON } }