From 9d47c3babdec5077548e161be790db23a7e60508 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 26 Apr 2021 13:26:01 +0800 Subject: [PATCH] feat(profiler): imperative profiler support tracing GitOrigin-RevId: b247472feba6d28416f52874c3517e50a8c2bd49 --- imperative/python/megengine/jit/tracing.py | 17 +- imperative/python/src/graph_rt.cpp | 6 +- imperative/src/impl/profiler_plugin.cpp | 207 +++++++++++++++++++++ .../include/megbrain/imperative/profiler_plugin.h | 46 +++++ 4 files changed, 272 insertions(+), 4 deletions(-) create mode 100644 imperative/src/impl/profiler_plugin.cpp create mode 100644 imperative/src/include/megbrain/imperative/profiler_plugin.h diff --git a/imperative/python/megengine/jit/tracing.py b/imperative/python/megengine/jit/tracing.py index 31999f1c..8f617364 100644 --- a/imperative/python/megengine/jit/tracing.py +++ b/imperative/python/megengine/jit/tracing.py @@ -17,7 +17,7 @@ from typing import Any import numpy as np -from ..core._imperative_rt import GraphProfiler, SerializationMetadata +from ..core._imperative_rt import GraphProfiler, GraphProfiler2, SerializationMetadata from ..core._imperative_rt.core2 import Tensor as RawTensor from ..core._imperative_rt.core2 import ( TensorWeakRef, @@ -39,6 +39,7 @@ from ..core.ops.special import Const from ..core.tensor import megbrain_graph as G from ..core.tensor.utils import setscalar from ..utils.naming import AutoNaming +from ..utils.profiler import is_profiling from .dtr_config import DTRConfig from .graph_opt_config import GraphOptimizationConfig from .sublinear_memory_config import SublinearMemoryConfig @@ -160,6 +161,7 @@ class trace: self._dtr_config = dtr_config self._profiling = profiling self._profiler = None + self._profiler2 = None self._graph_opt_level = opt_level self._graph_opt_config = graph_opt_config self._symbolic_shape = symbolic_shape @@ -382,7 +384,8 @@ class trace: lazy_eval_graph.options.graph_opt_level = self._graph_opt_level lazy_eval_graph._set_priority_to_id([*lazy_eval_links, *readers]) lazy_eval_graph.compile(*lazy_eval_links, *readers) - lazy_eval_graph() + self._execute_graph(lazy_eval_graph) + lazy_eval_graph.wait() for r, x in zip(readers, lazy_eval_tensors): # get values from lazy_eval_graph and assign to lazy_eval tensor x._handle = RawTensor(r.op.get_value())._handle @@ -401,7 +404,7 @@ class trace: else: if self._graph is None: self._compile() - self._graph.execute() + self._execute_graph(self._graph) def do_finalize(): escaped_tensors = self._take_escaped_tensors() @@ -532,9 +535,17 @@ class trace: # profile if self._profiling: self._profiler = GraphProfiler(graph) + self._profiler2 = None if int(os.getenv("MEGENGINE_INPLACE_UPDATE", "0")): graph.options.var_sanity_check_first_run = False + def _execute_graph(self, graph: G.Graph, *args): + if is_profiling() and (self._profiler2 is None): + self._profiler2 = GraphProfiler2(graph) + elif not is_profiling() and (self._profiler2 is not None): + self._profiler2 = None + graph.execute(*args) + def _compile(self): graph = self._graph = G.Graph() graph.options.async_exec_level = 0b100 diff --git a/imperative/python/src/graph_rt.cpp b/imperative/python/src/graph_rt.cpp index 837bc91f..b6fb0c20 100644 --- a/imperative/python/src/graph_rt.cpp +++ b/imperative/python/src/graph_rt.cpp @@ -23,7 +23,7 @@ #include "./common.h" #include "./ops.h" #include "megbrain/gopt/inference.h" - +#include "megbrain/imperative/profiler_plugin.h" namespace py = pybind11; @@ -239,6 +239,10 @@ void init_graph_rt(py::module m) { })) .def("get", [](_CompGraphProfilerImpl& profiler) { return profiler._get_result(); }); + using interpreter::intl::ProfilerPlugin; + py::class_>(m, "GraphProfiler2") + .def(py::init()); + auto GraphOptimizeOptions = py::class_<_OptimizeForInferenceOptions>(m, "GraphOptimizeOptions") .def(py::init()) .def("serialize", &_OptimizeForInferenceOptions::serialize) diff --git a/imperative/src/impl/profiler_plugin.cpp b/imperative/src/impl/profiler_plugin.cpp new file mode 100644 index 00000000..d4a462f7 --- /dev/null +++ b/imperative/src/impl/profiler_plugin.cpp @@ -0,0 +1,207 @@ +/** + * \file imperative/src/impl/profiler_plugin.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "megbrain/imperative/profiler_plugin.h" + +#include "megbrain/graph.h" +#include "megbrain/graph/event.h" + +#include "./profiler/events.h" + +namespace mgb::imperative::interpreter::intl { + +ProfilerPlugin::ProfilerPlugin(cg::ComputingGraph* graph): PluginBase(graph) { + using namespace cg; + using namespace cg::event; + using namespace profiler; + auto on_seq_start = [this](CompSeqExecBeforeStart const& event) { + // reset + mgb_assert(!event.graph->options().imperative_proxy_graph); + if (m_opr_dict.empty() && m_var_dict.empty()) { + init_seq(event.exec); + } + Profiler::record("DispatchOprs"); + event.exec->iter_opr_seq([this](OperatorNodeBase* opr) -> bool{ + auto& opr_info = get_opr_info(opr); + SmallVector inputs; + for (auto input: opr->input()) { + inputs.push_back(get_var_info(input).id); + } + SmallVector outputs; + for (auto output: opr->output()) { + outputs.push_back(get_var_info(output).id); + } + auto opr_name = opr->dyn_typeinfo()->name; + auto copy_params = [params = opr_info.params] { return *params; }; + Profiler::record(opr_info.id, opr_name, copy_params, inputs, outputs); + for (auto output: opr->output()) { + auto var_id = get_var_info(output).id; + Profiler::record(var_id); + } + return true; + }); + Profiler::record("DispatchOprs"); + Profiler::record("Constants"); + for (auto&& [var, var_info]: m_var_dict) { + if (var_info->is_const) { + bool valid = var->dev_tensor_valid(); + auto layout = valid ? var->layout() : TensorLayout(); + Profiler::record(var_info->id); + Profiler::record(var_info->id, layout, var->comp_node(), valid ? var->dev_tensor().raw_ptr() : nullptr); + } else { + var_info->rt_ref_cnt = var_info->ref_cnt; + } + } + Profiler::record("Constants"); + }; + auto on_opr_start = [this](OprExecStart const& event) { + OperatorNodeBase* opr = event.opr; + auto& opr_info = get_opr_info(opr); + auto comp_node = opr_info.comp_node; + auto runner = [&opr_info] { + Profiler::record(opr_info.id); + }; + event.env->dispatch_on_comp_node(comp_node, runner); + auto inputs = opr->input(); + for (auto&& input: inputs) { + auto& var_info = get_var_info(input); + auto runner = [&var_info, input] { + auto inp_id = var_info.id; + Profiler::record(inp_id, input->shape()); + Profiler::record(inp_id); + Profiler::record(inp_id, input->shape()); + }; + event.env->dispatch_on_comp_node(comp_node, runner); + } + }; + auto on_opr_finish = [this](OprExecKernelEnd const& event) { + OperatorNodeBase* opr = event.opr; + auto& opr_info = get_opr_info(opr); + auto comp_node = opr_info.comp_node; + auto inputs = opr->input(); + auto outputs = opr->output(); + for (auto input: inputs) { + auto& var_info = get_var_info(input); + auto runner = [&var_info] { + if (!var_info.is_const) { + if (--var_info.rt_ref_cnt == 0) { + Profiler::record(var_info.id); + } + } + }; + event.env->dispatch_on_comp_node(comp_node, runner); + } + for (auto output: outputs) { + auto& var_info = get_var_info(output); + mgb_assert(comp_node == output->comp_node(), "opr comp_node mismatch"); + auto runner = [&var_info, output] { + auto out_id = var_info.id; + bool valid = output->dev_tensor_valid(); + auto layout = valid ? output->layout() : TensorLayout(); + Profiler::record(out_id, output->shape()); + Profiler::record(out_id, layout, output->comp_node(), valid ? output->dev_tensor().raw_ptr() : nullptr); + if (!var_info.ref_cnt) { + Profiler::record(var_info.id); + } + Profiler::record(out_id, output->shape()); + }; + event.env->dispatch_on_comp_node(comp_node, runner); + } + auto runner = [&opr_info]() { + Profiler::record(opr_info.id); + }; + event.env->dispatch_on_comp_node(comp_node, runner); + }; + auto on_before_kern = [this](BeforeKernel const& event) { + OperatorNodeBase* opr = event.opr; + Profiler::record(get_opr_info(opr).id, get_opr_info(opr).id, Timer::record_event(event.comp_node)); + }; + auto on_after_kern = [this](AfterKernel const& event) { + OperatorNodeBase* opr = event.opr; + Profiler::record(get_opr_info(opr).id, get_opr_info(opr).id, Timer::record_event(event.comp_node)); + }; + auto on_graph_compile = [this](const CompSeqOrderDetermined&) { + m_opr_dict.clear(); + m_var_dict.clear(); + }; + auto on_seq_finish = [this](CompSeqExecFinished const& event) { + for (auto&& [var, var_info]: m_var_dict) { + MGB_MARK_USED_VAR(var); + if (var_info->is_const) { + Profiler::record(var_info->id); + } + Profiler::record(var_info->id, var_info->ref_cnt); + } + }; + add_event_handler(graph->event().register_receiver(on_seq_start)); + add_event_handler(graph->event().register_receiver(on_opr_start)); + add_event_handler(graph->event().register_receiver(on_opr_finish)); + add_event_handler(graph->event().register_receiver(on_before_kern)); + add_event_handler(graph->event().register_receiver(on_after_kern)); + add_event_handler(graph->event().register_receiver(on_graph_compile)); + add_event_handler(graph->event().register_receiver(on_seq_finish)); +} + +void ProfilerPlugin::init_seq(cg::AsyncExecutable *comp_seq) { + mgb_assert(m_opr_dict.empty()); + mgb_assert(m_var_dict.empty()); + comp_seq->iter_opr_seq([this](cg::OperatorNodeBase* opr){ + auto comp_nodes = get_opr_comp_node_set(opr); + mgb_assert(comp_nodes.size() == 1); + register_opr(opr); + for (auto&& input: opr->input()) { + if (m_var_dict.count(input) == 0) { + register_var(input).is_const = true; + } else { + get_var_info(input).ref_cnt++; + } + } + for (auto&& output: opr->output()) { + register_var(output).is_const = false; + } + //TODO: check ref_cnt + return true; + }); +} + +ProfilerPlugin::OprInfo& ProfilerPlugin::register_opr(cg::OperatorNodeBase *opr) { + OprInfo info; + info.id = Profiler::next_id(); + auto params = std::make_shared>(); + auto params_json = opr->to_json(); + for (auto&& [k, v]: params_json->cast_final().get_impl()) { + params->insert({k.get_impl(), v->to_string()}); + } + info.params = std::move(params); + auto comp_nodes = cg::get_opr_comp_node_set(opr); + mgb_assert(comp_nodes.size() == 1, "only support single comp_node opr"); + info.comp_node = *comp_nodes.begin(); + return m_opr_dict.insert({opr, info}).first->second; +} + +ProfilerPlugin::VarInfo& ProfilerPlugin::register_var(cg::VarNode *var) { + auto info = std::make_unique(); + info->id = Profiler::next_id(); + info->is_const = false; + info->ref_cnt = 0; + info->rt_ref_cnt = 0; + return *m_var_dict.insert({var, std::move(info)}).first->second; +} + +ProfilerPlugin::OprInfo& ProfilerPlugin::get_opr_info(cg::OperatorNodeBase *opr) { + return m_opr_dict.at(opr); +} + +ProfilerPlugin::VarInfo& ProfilerPlugin::get_var_info(cg::VarNode *var) { + return *m_var_dict.at(var); +} + +} diff --git a/imperative/src/include/megbrain/imperative/profiler_plugin.h b/imperative/src/include/megbrain/imperative/profiler_plugin.h new file mode 100644 index 00000000..5ee28fe0 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/profiler_plugin.h @@ -0,0 +1,46 @@ +/** + * \file imperative/src/impl/interpreter/profiler.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "megbrain/plugin/base.h" + +#include "megbrain/imperative/profiler.h" + +namespace mgb::imperative::interpreter::intl { + +class ProfilerPlugin: public PluginBase { +public: + struct OprInfo { + uint64_t id; + CompNode comp_node; + std::shared_ptr> params; + }; + + struct VarInfo { + uint64_t id; + bool is_const; + size_t ref_cnt; + std::atomic_size_t rt_ref_cnt; + }; +private: + std::unordered_map m_opr_dict; + std::unordered_map> m_var_dict; +public: + explicit ProfilerPlugin(cg::ComputingGraph* graph); + void init_seq(cg::AsyncExecutable* comp_seq); + OprInfo& register_opr(cg::OperatorNodeBase* opr); + VarInfo& register_var(cg::VarNode* var); + OprInfo& get_opr_info(cg::OperatorNodeBase* opr); + VarInfo& get_var_info(cg::VarNode* var); +}; + +}