Browse Source

fix(mgb): fix TensorRT runtime opr profiling

GitOrigin-RevId: 3545aa53b2
tags/v1.3.1
Megvii Engine Team 4 years ago
parent
commit
bcbfbbd743
4 changed files with 4 additions and 154 deletions
  1. +2
    -44
      src/tensorrt/impl/tensorrt_opr.cpp
  2. +2
    -2
      src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h
  3. +0
    -44
      src/tensorrt/test/tensorrt.cpp
  4. +0
    -64
      src/tensorrt/test/tensorrt_runtime.cpp

+ 2
- 44
src/tensorrt/impl/tensorrt_opr.cpp View File

@@ -50,17 +50,6 @@ void TensorRTProfiler::print_layer_times() {
printf("Total time: %4.3fms\n", total_time);
}

std::shared_ptr<json::Value> TensorRTProfiler::to_json() {
using namespace json;
auto prof_arr = Array::make();
for (auto&& rec : profile) {
auto&& item = Array::make();
item->add(String::make(rec.first));
item->add(Number::make(rec.second));
prof_arr->add(item);
}
return prof_arr;
}
#endif // MGB_ENABLE_JSON


@@ -168,7 +157,7 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
CompNode comp_node_check,
nvinfer1::ICudaEngine* engine,
size_t batch) {
size_t batch, bool use_trt_profiler) {

auto comp_node = opr->comp_node();
// ICudaEngine is bound to the currently active device
@@ -180,22 +169,11 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
comp_node_check.to_string().c_str(),
comp_node.to_string().c_str());
}
#if MGB_ENABLE_JSON
auto pf_holder_pair =
opr->owner_graph()
->options()
.user_data.get_user_data<opr_profile::OprProfileHolder>();
if (m_has_profiler && !pf_holder_pair.second) {
m_context.reset();
m_has_profiler = false;
}
#endif
auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr();
bool should_reinit_device_memory =
!m_context || m_device_workspace_memory_ptr != workspace_ptr;
if (!m_context) {
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}};
m_has_profiler = false;
}
m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1);
bool is_trt_opr = false;
@@ -235,11 +213,7 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,

bool exec_success = false;

#if MGB_ENABLE_JSON
if (!pf_holder_pair.second) {
mgb_assert(!m_has_profiler,
"Invalid state of TensorRTRuntimeOpr: should not have "
"profiler.");
if (!use_trt_profiler) {
#if NV_TENSOR_RT_VERSION >= 6001
if (is_trt_opr)
exec_success = m_context->enqueueV2(m_trt_iobuf.data(),
@@ -255,7 +229,6 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
} else {
TensorRTProfiler trt_profiler;
m_context->setProfiler(&trt_profiler);
m_has_profiler = true;
// TensorRT documentation stated that IExecutionContext->execute
// "Synchronously execute inference on a batch", and it does not take a
// cudaStream_t, we expect it do a device synchronize. But it seems like
@@ -272,24 +245,9 @@ void TensorRTManager::exec(cg::SingleCNOperatorNodeBase* opr,
exec_success = m_context->execute(batch, m_trt_iobuf.data());
#endif
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname());
pf_holder_pair.first[0]->id2object_map[opr] = trt_profiler.to_json();
printf("TRT profile info of opr %s:\n", opr->name().c_str());
trt_profiler.print_layer_times();
}
#else
#if NV_TENSOR_RT_VERSION >= 6001
if (is_trt_opr)
exec_success = m_context->enqueueV2(m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
else
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
#else
exec_success = m_context->enqueue(batch, m_trt_iobuf.data(),
env.cuda_env().stream, nullptr);
#endif
mgb_assert(exec_success, "trt execution failed: opr=%s", opr->cname());
#endif
}

/* ========================== TensorRTOpr ========================== */


+ 2
- 2
src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h View File

@@ -50,11 +50,11 @@ class TensorRTManager {
std::vector<void*> m_trt_iobuf;
TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context;
void* m_device_workspace_memory_ptr;
bool m_has_profiler;

public:
void exec(cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check,
nvinfer1::ICudaEngine* engine, size_t batch = 1);
nvinfer1::ICudaEngine* engine, size_t batch = 1,
bool use_trt_profiler = false);

void clear_trt_context() { m_context.reset(); }



+ 0
- 44
src/tensorrt/test/tensorrt.cpp View File

@@ -28,50 +28,6 @@ using namespace mgb;
using namespace nvinfer1;
using namespace opr;

TEST(TestOprTensorRT, Profile) {
REQUIRE_GPU(1);
intl::ConcatConvTensorRTNetwork net;

auto p = net.create_trt_network(true);

auto y2 = TensorRTOpr::make(TensorRTOpr::to_shared_ptr_builder(p.first),
TensorRTOpr::to_shared_ptr_network(p.second),
intl::TensorRTGraphFeatureBits::NCHW_FLOAT, {},
{net.x0, net.x1})[0];

HostTensorND host_z1;
HostTensorND host_z2;
auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
make_callback_copy(y2, host_z2)});
{
mgb::GraphProfiler profiler(net.graph.get());

func->execute();

profiler.to_json()->writeto_fpath(
output_file("TestOprTensorRT.Profile.FromProfiler.json"));
auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get());

auto record_obj =
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get());
auto opr_prof_arr = *static_cast<json::Array*>(
record_obj[y2.node()->owner_opr()->id_str()].get());
for (auto item_arr : opr_prof_arr.get_impl()) {
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get());
auto layer_time =
*static_cast<json::Number*>(layer_info_arr[1].get());

mgb_assert(layer_time.get_impl() > 0, "Error occured in json.");
}

MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
// Run it again after profiler is not in existance.
func->execute();

MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}

TEST(TestOprTensorRT, Basic) {
REQUIRE_GPU(1);
intl::SimpleTensorRTNetwork net;


+ 0
- 64
src/tensorrt/test/tensorrt_runtime.cpp View File

@@ -10,7 +10,6 @@
*/

#include "megbrain/comp_node_env.h"
#include "megbrain/plugin/profiler.h"
#include "megbrain/test/autocheck.h"
#include "megbrain/test/helper.h"
#include "megbrain/test/megdnn_helper.h"
@@ -102,69 +101,6 @@ TEST(TestOprTensorRT, ConcatRuntimeBasic) {
MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}

TEST(TestOprTensorRT, RuntimeProfile) {
REQUIRE_GPU(1);
intl::ConcatConvTensorRTNetwork net;
SymbolVar y2;
{
auto p = net.create_trt_network(false);
TensorRTUniquePtr<INetworkDefinition> trt_net{p.second, {}};
TensorRTUniquePtr<IBuilder> builder{p.first, {}};
builder->setMaxBatchSize(5);
#if NV_TENSOR_RT_VERSION >= 6001
TensorRTUniquePtr<IBuilderConfig> build_config{
builder->createBuilderConfig()};
auto cuda_engine =
builder->buildEngineWithConfig(*trt_net, *build_config);
#else
auto cuda_engine = builder->buildCudaEngine(*trt_net);
#endif
TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};

FILE* fout = fopen(output_file("trt_cuda_engine").c_str(), "wb");
auto wr = fwrite(mem->data(), 1, mem->size(), fout);
mgb_assert(wr == mem->size());
fclose(fout);

y2 = TensorRTRuntimeOpr::make(
TensorRTRuntimeOpr::to_shared_ptr_engine(cuda_engine), {},
{net.x0, net.x1})[0];
}

HostTensorND host_z1;
HostTensorND host_z2;
auto func = net.graph->compile({make_callback_copy(net.y, host_z1),
make_callback_copy(y2, host_z2)});

{
mgb::GraphProfiler profiler(net.graph.get());

func->execute();

profiler.to_json()->writeto_fpath(output_file(
"TestOprTensorRT.RuntimeProfile.FromProfiler.json"));

auto prof_obj = *static_cast<json::Object*>(profiler.to_json().get());
auto record_obj =
*static_cast<json::Object*>(prof_obj["opr_internal_pf"].get());
auto opr_prof_arr = *static_cast<json::Array*>(
record_obj[y2.node()->owner_opr()->id_str()].get());
for (auto item_arr : opr_prof_arr.get_impl()) {
auto layer_info_arr = *static_cast<json::Array*>(item_arr.get());
auto layer_time =
*static_cast<json::Number*>(layer_info_arr[1].get());

mgb_assert(layer_time.get_impl() > 0, "Error occured in json.");
}

MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}
// Run it again after profiler is not in existance.
func->execute();

MGB_ASSERT_TENSOR_NEAR(host_z1, host_z2, 1e-4);
}

TEST(TestOprTensorRT, RuntimeChangeBatchSize) {
REQUIRE_GPU(1);
intl::SimpleTensorRTNetwork net;


Loading…
Cancel
Save