@@ -50,17 +50,6 @@ void TensorRTProfiler::print_layer_times() { | |||
printf("Total time: %4.3fms\n", total_time); | |||
} | |||
std::shared_ptr<json::Value> TensorRTProfiler::to_json() { | |||
using namespace json; | |||
auto prof_arr = Array::make(); | |||
for (auto&& rec : profile) { | |||
auto&& item = Array::make(); | |||
item->add(String::make(rec.first)); | |||
item->add(Number::make(rec.second)); | |||
prof_arr->add(item); | |||
} | |||
return prof_arr; | |||
} | |||
#endif // MGB_ENABLE_JSON | |||
@@ -168,7 +157,7 @@ void TensorRTOpr::GpuAllocator::free(void* memory) { | |||
void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||
CompNode comp_node_check, | |||
nvinfer1::ICudaEngine* engine, | |||
size_t batch) { | |||
size_t batch, bool use_trt_profiler) { | |||
auto comp_node = opr->comp_node(); | |||
// ICudaEngine is bound to the currently active device | |||
@@ -180,22 +169,11 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||
comp_node_check.to_string().c_str(), | |||
comp_node.to_string().c_str()); | |||
} | |||
#if MGB_ENABLE_JSON | |||
auto pf_holder_pair = | |||
opr->owner_graph() | |||
->options() | |||
.user_data.get_user_data<opr_profile::OprProfileHolder>(); | |||
if (m_has_profiler && !pf_holder_pair.second) { | |||
m_context.reset(); | |||
m_has_profiler = false; | |||
} | |||
#endif | |||
auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr(); | |||
bool should_reinit_device_memory = | |||
!m_context || m_device_workspace_memory_ptr != workspace_ptr; | |||
if (!m_context) { | |||
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; | |||
m_has_profiler = false; | |||
} | |||
m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); | |||
bool is_trt_opr = false; | |||
@@ -235,11 +213,7 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||
bool exec_success = false; | |||
#if MGB_ENABLE_JSON | |||
if (!pf_holder_pair.second) { | |||
mgb_assert(!m_has_profiler, | |||
"Invalid state of TensorRTRuntimeOpr: should not have " | |||
"profiler."); | |||
if (!use_trt_profiler) { | |||
#if NV_TENSOR_RT_VERSION >= 6001 | |||
if (is_trt_opr) | |||
exec_success = m_context->enqueueV2(m_trt_iobuf.data(), | |||
@@ -255,7 +229,6 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||
} else { | |||
TensorRTProfiler trt_profiler; | |||
m_context->setProfiler(&trt_profiler); | |||
m_has_profiler = true; | |||
// TensorRT documentation stated that IExecutionContext->execute | |||
// "Synchronously execute inference on a batch", and it does not take a | |||
// cudaStream_t, we expect it do a device synchronize. But it seems like | |||
@@ -272,24 +245,9 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr, | |||
exec_success = m_context->execute(batch, m_trt_iobuf.data()); | |||
#endif | |||
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); | |||
pf_holder_pair.first[0]->id2object_map[opr] = trt_profiler.to_json(); | |||
printf("TRT profile info of opr %s:\n", opr->name().c_str()); | |||
trt_profiler.print_layer_times(); | |||
} | |||
#else | |||
#if NV_TENSOR_RT_VERSION >= 6001 | |||
if (is_trt_opr) | |||
exec_success = m_context->enqueueV2(m_trt_iobuf.data(), | |||
env.cuda_env().stream, nullptr); | |||
else | |||
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(), | |||
env.cuda_env().stream, nullptr); | |||
#else | |||
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(), | |||
env.cuda_env().stream, nullptr); | |||
#endif | |||
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname()); | |||
#endif | |||
} | |||
/* ========================== TensorRTOpr ========================== */ | |||
@@ -50,11 +50,11 @@ class TensorRTManager { | |||
std::vector<void*> m_trt_iobuf; | |||
TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context; | |||
void* m_device_workspace_memory_ptr; | |||
bool m_has_profiler; | |||
public: | |||
void exec(cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, | |||
nvinfer1::ICudaEngine* engine, size_t batch = 1); | |||
nvinfer1::ICudaEngine* engine, size_t batch = 1, | |||
bool use_trt_profiler = false); | |||
void clear_trt_context() { m_context.reset(); } | |||
@@ -28,50 +28,6 @@ using namespace mgb; | |||
using namespace nvinfer1; | |||
using namespace opr; | |||
TEST(TestOprTensorRT, Profile) { | |||
REQUIRE_GPU(1); | |||
intl::ConcatConvTensorRTNetwork net; | |||
auto p = net.create_trt_network(true); | |||
auto y2 = TensorRTOpr::make(TensorRTOpr::to_shared_ptr_builder(p.first), | |||
TensorRTOpr::to_shared_ptr_network(p.second), | |||
intl::TensorRTGraphFeatureBits::NCHW_FLOAT, {}, | |||
{net.x0, net.x1})[0]; | |||
HostTensorND host_z1; | |||
HostTensorND host_z2; | |||
auto func = net.graph->compile({make_callback_copy(net.y, host_z1), | |||
make_callback_copy(y2, host_z2)}); | |||
{ | |||
mgb::GraphProfiler profiler(net.graph.get()); | |||
func->execute(); | |||
profiler.to_json()->writeto_fpath( | |||
output_file("TestOprTensorRT.Profile.FromProfiler.json")); | |||
auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get()); | |||
auto record_obj = | |||
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get()); | |||
auto opr_prof_arr = *static_cast<json::Array*>( | |||
record_obj[y2.node()->owner_opr()->id_str()].get()); | |||
for (auto item_arr : opr_prof_arr.get_impl()) { | |||
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get()); | |||
auto layer_time = | |||
*static_cast<json::Number*>(layer_info_arr[1].get()); | |||
mgb_assert(layer_time.get_impl() > 0, "Error occured in json."); | |||
} | |||
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); | |||
} | |||
// Run it again after profiler is not in existance. | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); | |||
} | |||
TEST(TestOprTensorRT, Basic) { | |||
REQUIRE_GPU(1); | |||
intl::SimpleTensorRTNetwork net; | |||
@@ -10,7 +10,6 @@ | |||
*/ | |||
#include "megbrain/comp_node_env.h" | |||
#include "megbrain/plugin/profiler.h" | |||
#include "megbrain/test/autocheck.h" | |||
#include "megbrain/test/helper.h" | |||
#include "megbrain/test/megdnn_helper.h" | |||
@@ -102,69 +101,6 @@ TEST(TestOprTensorRT, ConcatRuntimeBasic) { | |||
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); | |||
} | |||
TEST(TestOprTensorRT, RuntimeProfile) { | |||
REQUIRE_GPU(1); | |||
intl::ConcatConvTensorRTNetwork net; | |||
SymbolVar y2; | |||
{ | |||
auto p = net.create_trt_network(false); | |||
TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}}; | |||
TensorRTUniquePtr<IBuilder> builder{p.first, {}}; | |||
builder->setMaxBatchSize(5); | |||
#if NV_TENSOR_RT_VERSION >= 6001 | |||
TensorRTUniquePtr<IBuilderConfig> build_config{ | |||
builder->createBuilderConfig()}; | |||
auto cuda_engine = | |||
builder->buildEngineWithConfig(*trt_net, *build_config); | |||
#else | |||
auto cuda_engine = builder->buildCudaEngine(*trt_net); | |||
#endif | |||
TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}}; | |||
FILE* fout = fopen(output_file("trt_cuda_engine").c_str(), "wb"); | |||
auto wr = fwrite(mem->data(), 1, mem->size(), fout); | |||
mgb_assert(wr == mem->size()); | |||
fclose(fout); | |||
y2 = TensorRTRuntimeOpr::make( | |||
TensorRTRuntimeOpr::to_shared_ptr_engine(cuda_engine), {}, | |||
{net.x0, net.x1})[0]; | |||
} | |||
HostTensorND host_z1; | |||
HostTensorND host_z2; | |||
auto func = net.graph->compile({make_callback_copy(net.y, host_z1), | |||
make_callback_copy(y2, host_z2)}); | |||
{ | |||
mgb::GraphProfiler profiler(net.graph.get()); | |||
func->execute(); | |||
profiler.to_json()->writeto_fpath(output_file( | |||
"TestOprTensorRT.RuntimeProfile.FromProfiler.json")); | |||
auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get()); | |||
auto record_obj = | |||
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get()); | |||
auto opr_prof_arr = *static_cast<json::Array*>( | |||
record_obj[y2.node()->owner_opr()->id_str()].get()); | |||
for (auto item_arr : opr_prof_arr.get_impl()) { | |||
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get()); | |||
auto layer_time = | |||
*static_cast<json::Number*>(layer_info_arr[1].get()); | |||
mgb_assert(layer_time.get_impl() > 0, "Error occured in json."); | |||
} | |||
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); | |||
} | |||
// Run it again after profiler is not in existance. | |||
func->execute(); | |||
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4); | |||
} | |||
TEST(TestOprTensorRT, RuntimeChangeBatchSize) { | |||
REQUIRE_GPU(1); | |||
intl::SimpleTensorRTNetwork net; | |||