From 6f581906a1df0694085e2cf4bc2afd8c1bd59182 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 4 Sep 2020 17:44:07 +0800 Subject: [PATCH] refactor(mge/profiler): refactor profiler GitOrigin-RevId: 279aa779a69e503e38e91ce06a973028824eb0aa --- imperative/python/megengine/utils/profiler.py | 138 ++++++++++++- imperative/python/src/helper.cpp | 7 +- imperative/python/src/utils.cpp | 38 ++-- imperative/src/impl/event_pool.cpp | 68 +++++++ imperative/src/impl/event_pool.h | 25 +++ imperative/src/impl/physical_tensor.cpp | 113 +++-------- imperative/src/impl/profiler.cpp | 221 +++++---------------- .../include/megbrain/imperative/function_hook.h | 55 +++++ .../src/include/megbrain/imperative/profiler.h | 68 +++++-- src/core/impl/comp_node/comp_node.cpp | 6 +- src/core/include/megbrain/comp_node.h | 3 +- 11 files changed, 426 insertions(+), 316 deletions(-) create mode 100644 imperative/src/impl/event_pool.cpp create mode 100644 imperative/src/impl/event_pool.h create mode 100644 imperative/src/include/megbrain/imperative/function_hook.h diff --git a/imperative/python/megengine/utils/profiler.py b/imperative/python/megengine/utils/profiler.py index 12dae2d2..2b805afb 100644 --- a/imperative/python/megengine/utils/profiler.py +++ b/imperative/python/megengine/utils/profiler.py @@ -6,24 +6,148 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -from typing import Optional +import base64 +import json +import os +from typing import List, Optional -from ..core._imperative_rt import ProfilerImpl +from ..core._imperative_rt import OperatorNodeConfig, ProfileEntry +from ..core._imperative_rt import ProfilerImpl as _Profiler from ..core._imperative_rt.imperative import sync +from ..core._imperative_rt.ops import CollectiveCommMode +from ..core.ops.builtin import GetVarShape class Profiler: - def __init__(self, path: Optional[str] = None): - self.impl = ProfilerImpl(path) + r""" + Profile graph execution in imperative mode. + + :type path: Optional[str] + :param path: default path for profiler to dump + + Examples: + + .. testcode:: + + import megengine as mge + import megengine.module as M + import megengine.utils.profiler.Profiler + + # With Learnable Parameters + for iter in range(0, 10): + # Only profile record of last iter would be saved + with Profiler("profile.json"): + # your code here + + # Then open the profile file in chrome timeline window + """ + + # see https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html + GOOD = "good" + BAD = "bad" + TERRIBLE = "terrible" + + BLACK = "black" + GREY = "grey" + WHITE = "white" + YELLOW = "yellow" + OLIVE = "olive" + + def __init__(self, path: str = "profile.json"): + self._impl = _Profiler() + self._path = path + self._color_map = {} + self._type_map = { + OperatorNodeConfig: lambda x: self.print_opnode_config(x), + bytes: lambda x: base64.encodebytes(x).decode("ascii"), + CollectiveCommMode: lambda x: str(x), + } def __enter__(self): sync() - self.impl.enable() + self._impl.start() return self def __exit__(self, val, type, trace): sync() - self.impl.disable() + self._impl.stop() + if self._path is not None: + self.dump() + + def recolor(self, target: str, color: str): + self._color_map[target] = color + return self + + def print_opnode_config(self, config): + return self.make_dict( + name=config.name, dtype=config.dtype, comp_node_arr=config.comp_node_arr, + ) + + def fetch_attrs(self, op): + attrs = dir(op) + results = {} + for attr in attrs: + if attr.startswith("_"): + continue + value = op.__getattribute__(attr) + if callable(value): + continue + value_type = type(value) + if value_type in self._type_map: + value = self._type_map[value_type](value) + results[attr] = value + return results + + def make_dict(self, **kwargs): + unused_keys = [] + for k, v in kwargs.items(): + if v is None: + unused_keys.append(k) + for k in unused_keys: + del kwargs[k] + return kwargs def dump(self, path: Optional[str] = None): - self.impl.dump(path) + pid = os.getpid() + if path is None: + path = self._path + trace_events = [] + + def append_event(**kwargs): + trace_events.append(self.make_dict(**kwargs)) + + entries: List[ProfileEntry] = self._impl.dump() + + for id, entry in enumerate(entries): + op = entry.op + name = type(op).__name__ + host_begin, host_end = entry.host + device_list = entry.device_list + args = self.fetch_attrs(op) + args["__id__"] = "[{}]".format(id) + cname = self._color_map[name] if name in self._color_map else None + cat = name + for ts, ph in [(host_begin, "B"), (host_end, "E")]: + append_event( + name=name, + ph=ph, + ts=ts * 1000, + pid=pid, + tid="host", + args=args, + cname=cname, + cat=cat, + ) + for device, device_begin, device_end in device_list: + for ts, ph in [(device_begin(), "B"), (device_end(), "E")]: + append_event( + name=name, + ph=ph, + ts=ts * 1000, + pid=pid, + tid=str(device), + args=args, + cname=cname, + ) + with open(path, "w") as f: + json.dump(trace_events, f, indent=2) diff --git a/imperative/python/src/helper.cpp b/imperative/python/src/helper.cpp index bb14545e..0d4bd702 100644 --- a/imperative/python/src/helper.cpp +++ b/imperative/python/src/helper.cpp @@ -651,9 +651,14 @@ PyObject* npy::dtype_mgb2np(mgb::DType dtype) { // https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.PyArray_TypeObjectFromType // the following is equivalent to PyArray_TypeObjectFromType for built-in // types. + if(!dtype.valid()){ + Py_XINCREF(Py_None); + return Py_None; + } auto descr = dtype_mgb2np_descr(dtype); if (descr == nullptr) { - return nullptr; + Py_XINCREF(Py_None); + return Py_None; } if (dtype.has_param()) { return reinterpret_cast(descr.release()); diff --git a/imperative/python/src/utils.cpp b/imperative/python/src/utils.cpp index 0cda89c3..ed851af2 100644 --- a/imperative/python/src/utils.cpp +++ b/imperative/python/src/utils.cpp @@ -199,32 +199,22 @@ void init_utils(py::module m) { m.def("_get_device_count", &mgb::CompNode::get_device_count, "Get total number of specific devices on this system"); - using mgb::imperative::Profiler; + using mgb::imperative::ProfileEntry; - py::class_(m, "ProfilerImpl") + py::class_(m, "ProfileEntry") + .def_readwrite("op", &ProfileEntry::op) + .def_readwrite("host", &ProfileEntry::host) + .def_readwrite("device_list", &ProfileEntry::device_list); + + py::class_(m, "ProfilerImpl") .def(py::init<>()) - .def(py::init()) - .def("enable", - [](Profiler& profiler) -> Profiler& { - profiler.enable(); - return profiler; - }) - .def("disable", - [](Profiler& profiler) { - if (profiler.get_dump_count() == 0) { - profiler.dump(); - } - profiler.disable(); - }) - .def("dump", - [](Profiler& profiler, std::optional path) { - if (path.has_value()) { - profiler.dump(path.value()); - } else { - profiler.dump(); - } - }, - py::arg("path") = std::optional()); + .def("start", + [](mgb::imperative::Profiler& profiler) { profiler.start(); }) + .def("stop", + [](mgb::imperative::Profiler& profiler) { profiler.stop(); }) + .def("dump", [](mgb::imperative::Profiler& profiler) { + return profiler.get_profile(); + }); using mgb::imperative::TensorSanityCheck; py::class_(m, "TensorSanityCheckImpl") diff --git a/imperative/src/impl/event_pool.cpp b/imperative/src/impl/event_pool.cpp new file mode 100644 index 00000000..a5509d49 --- /dev/null +++ b/imperative/src/impl/event_pool.cpp @@ -0,0 +1,68 @@ +#include "./event_pool.h" + +namespace mgb { +namespace imperative { + +EventPool::EventPool(size_t flags) : m_flags{flags} {} + +EventPool& EventPool::with_timer() { + static Spinlock lock; + static std::unique_ptr ptr; + MGB_LOCK_GUARD(lock); + if (!ptr || ptr->is_finalized()) { + ptr.reset(new EventPool(CompNode::Event::NEED_TIMER)); + } + return *ptr; +} +EventPool& EventPool::without_timer() { + static Spinlock lock; + static std::unique_ptr ptr; + MGB_LOCK_GUARD(lock); + if (!ptr || ptr->is_finalized()) { + ptr.reset(new EventPool()); + } + return *ptr; +} +CompNode::Event* EventPool::alloc(CompNode cn) { + CompNode::EventPool* pool; + { + MGB_LOCK_GUARD(m_lock); + auto iter = m_cn2pool.find(cn); + if (iter == m_cn2pool.end()) { + iter = m_cn2pool + .emplace(std::piecewise_construct, + std::forward_as_tuple(cn), + std::forward_as_tuple(cn, m_flags)) + .first; + } + pool = &iter->second; + } + return pool->alloc(); +} +std::shared_ptr EventPool::alloc_shared(CompNode cn) { + auto* raw_event = alloc(cn); + return {raw_event, [this](CompNode::Event* event){ this->free(event); }}; +} +void EventPool::free(CompNode::Event* event) { + CompNode::EventPool* pool; + { + MGB_LOCK_GUARD(m_lock); + pool = &m_cn2pool.at(event->comp_node()); + } + pool->free(event); +} +std::shared_ptr EventPool::on_comp_node_finalize() { + MGB_LOCK_GUARD(m_lock); + for (auto&& i : m_cn2pool) { + i.second.assert_all_freed(); + } + return {}; +} +EventPool::~EventPool() { + for (auto&& i : m_cn2pool) { + i.second.assert_all_freed(); + } +} + +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/impl/event_pool.h b/imperative/src/impl/event_pool.h new file mode 100644 index 00000000..6263b010 --- /dev/null +++ b/imperative/src/impl/event_pool.h @@ -0,0 +1,25 @@ +#pragma once + +#include "megbrain/comp_node.h" + +namespace mgb { +namespace imperative { + +class EventPool : CompNodeDepedentObject { + CompNode::UnorderedMap m_cn2pool; + Spinlock m_lock; + size_t m_flags; + + EventPool(size_t flags = 0); + +public: + static EventPool& with_timer(); + static EventPool& without_timer(); + CompNode::Event* alloc(CompNode cn); + std::shared_ptr alloc_shared(CompNode cn); + void free(CompNode::Event* event); + std::shared_ptr on_comp_node_finalize(); + ~EventPool(); +}; +} // namespace imperative +} // namespace mgb diff --git a/imperative/src/impl/physical_tensor.cpp b/imperative/src/impl/physical_tensor.cpp index 0f0154f7..94a4219d 100644 --- a/imperative/src/impl/physical_tensor.cpp +++ b/imperative/src/impl/physical_tensor.cpp @@ -11,6 +11,7 @@ #include "megbrain/imperative.h" #include "megbrain/imperative/blob_manager.h" +#include "./event_pool.h" #include namespace mgb { @@ -18,86 +19,31 @@ namespace imperative { namespace { -class EventPool : CompNodeDepedentObject { - CompNode::UnorderedMap m_cn2pool; - Spinlock m_lock; - - EventPool() = default; -public: - static EventPool& inst() { - static Spinlock lock; - static std::unique_ptr ptr; - MGB_LOCK_GUARD(lock); - if (!ptr || ptr->is_finalized()) { - ptr.reset(new EventPool()); - } - return *ptr; - } - CompNode::Event* alloc(CompNode cn) { - CompNode::EventPool *pool; - { - MGB_LOCK_GUARD(m_lock); - auto iter = m_cn2pool.find(cn); - if (iter == m_cn2pool.end()) { - iter = m_cn2pool.emplace( - std::piecewise_construct, - std::forward_as_tuple(cn), - std::forward_as_tuple(cn)).first; - } - pool = &iter->second; - } - return pool->alloc(); - } - void free(CompNode::Event* event) { - CompNode::EventPool* pool; - { - MGB_LOCK_GUARD(m_lock); - pool = &m_cn2pool.at(event->comp_node()); - } - pool->free(event); - } - std::shared_ptr on_comp_node_finalize() override { - MGB_LOCK_GUARD(m_lock); - for (auto&& i : m_cn2pool) { - i.second.assert_all_freed(); - } - return {}; - } - ~EventPool() { - for (auto&& i : m_cn2pool) { - i.second.assert_all_freed(); - } - } -}; - class AsyncReleaser : public CompNodeDepedentObject { struct WaiterParam { CompNode cn; - CompNode::Event *event; + CompNode::Event* event; BlobPtr blob; HostTensorStorage::RawStorage storage; }; - class Waiter final: public AsyncQueueSC { - AsyncReleaser *m_par_releaser; - - public: - Waiter(AsyncReleaser *releaser): - m_par_releaser(releaser) - { + class Waiter final : public AsyncQueueSC { + AsyncReleaser* m_par_releaser; + + public: + Waiter(AsyncReleaser* releaser) : m_par_releaser(releaser) {} + + void process_one_task(WaiterParam& param) { + if (param.event->finished()) { + param.blob.reset(); + param.storage.reset(); + EventPool::without_timer().free(param.event); + return; } - void process_one_task(WaiterParam ¶m) { - if (param.event->finished()) { - param.blob.reset(); - param.storage.reset(); - EventPool::inst().free(param.event); - return; - } - - using namespace std::literals; - std::this_thread::sleep_for(1us); - add_task(std::move(param)); - } + using namespace std::literals; + std::this_thread::sleep_for(1us); + add_task(std::move(param)); + } }; Waiter m_waiter{this}; @@ -113,20 +59,17 @@ public: return &releaser; } - ~AsyncReleaser() { - m_waiter.wait_task_queue_empty(); - } + ~AsyncReleaser() { m_waiter.wait_task_queue_empty(); } - void add(BlobPtr blob, CompNode cn) { - add(cn, std::move(blob), {}); - } + void add(BlobPtr blob, CompNode cn) { add(cn, std::move(blob), {}); } void add(const HostTensorND& hv) { add(hv.comp_node(), {}, hv.storage().raw_storage()); } - void add(CompNode cn, BlobPtr blob, HostTensorStorage::RawStorage storage = {}) { - auto event = EventPool::inst().alloc(cn); + void add(CompNode cn, BlobPtr blob, + HostTensorStorage::RawStorage storage = {}) { + auto event = EventPool::without_timer().alloc(cn); event->record(); m_waiter.add_task({cn, event, std::move(blob), std::move(storage)}); } @@ -290,10 +233,10 @@ struct MultiCNConstTensorCache : CompNodeDepedentObject { MultiCNConstTensorCache const_tensor_cache; -} // namespace +} // namespace void EventDeleter::operator()(CompNode::Event* event) { - EventPool::inst().free(event); + EventPool::without_timer().free(event); } Blob::Blob(const DeviceTensorStorage& s): @@ -373,7 +316,7 @@ void Tensor::fetch_value() { MGB_LOCK_GUARD(m_mtx); if (m_value.empty()) { m_value.copy_from(dev_tensor()); - m_value_ready.reset(EventPool::inst().alloc(comp_node())); + m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); m_value_ready->record(); } } @@ -421,7 +364,7 @@ CompNode::Event* Tensor::get_or_create_event() { return e; } -} // namespace imperative -} // namespace mgb +} // namespace imperative +} // namespace mgb // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/imperative/src/impl/profiler.cpp b/imperative/src/impl/profiler.cpp index 008fcc6f..4987ce53 100644 --- a/imperative/src/impl/profiler.cpp +++ b/imperative/src/impl/profiler.cpp @@ -11,63 +11,18 @@ #include "megbrain/imperative/profiler.h" -#if defined(_MSC_VER) || defined(WIN32) -#include -#define getpid GetCurrentProcessId -#else -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#endif - #include #include "megbrain/imperative/ops/opr_attr.h" #include "megbrain/imperative/physical_tensor.h" +#include "./event_pool.h" #include "./op_trait.h" namespace mgb { namespace imperative { -class OpDefInfo{ -public: - size_t id; - std::string name; -}; - -class ProfilerEntry { -public: - ProfilerEntry(size_t index, Profiler::EventKind type, std::unique_ptr device) - : index{index}, type{type}, device{std::move(device)}{ - } - ProfilerEntry(size_t index, Profiler::EventKind type, double host): index{index}, type{type}, host{host}{ - } - size_t index; - Profiler::EventKind type; - std::unique_ptr device = nullptr; - double host = 0; -}; - -class ProfilerPrivate { -public: - std::vector op_list; - std::vector entry_list; - std::vector> event_list; - std::vector>> - hook_list; - ThinHashMap> - comp_node_begin_map; - ThinHashMap comp_node_end_map; - RealTimer timer; - size_t dump_count = 0; - bool enabled = false; - std::string path; -}; - namespace { CompNode::UnorderedSet collect_comp_nodes( const OpDef& def, const SmallVector& inputs) { @@ -80,145 +35,65 @@ CompNode::UnorderedSet collect_comp_nodes( } return comp_nodes; } -} // namespace - -std::unique_ptr Profiler::create_event(CompNode comp_node){ - auto event = comp_node.create_event(CompNode::Event::NEED_TIMER); - event->record(); - auto& [begin, time] = m_private->comp_node_begin_map[comp_node]; - if (begin == nullptr) { - begin = event.get(); - time = m_private->timer.get_msecs(); - } - return event; -} - -double Profiler::get_host_time_now(){ - return m_private->timer.get_msecs(); -} - -double Profiler::get_device_time(CompNode::Event& event) { - auto [base_event, host_time] = - m_private->comp_node_begin_map[event.comp_node()]; - if (base_event == &event) { - return host_time; - } else { - return host_time + base_event->elapsed_time_until(event) * 1000; - } -} -size_t Profiler::get_dump_count(){ - return m_private->dump_count; -} - -Profiler::Profiler() { - m_private = std::make_unique(); -} - -Profiler::Profiler(const std::string& path): Profiler() { - m_private->path = path; -} +} // namespace -void Profiler::enable() { - m_private->enabled = true; - CompNode::sync_all(); - OpTrait::for_each_trait([this](OpTrait& trait) { - auto backup = std::make_unique( - std::move(trait.apply_on_physical_tensor)); - trait.apply_on_physical_tensor = - [this, backup = backup.get()] ( - const OpDef& def, - const SmallVector& inputs){ - size_t index = m_private->op_list.size(); - std::string name = "[" + std::to_string(index) + "]" + print_op(def); - m_private->op_list.push_back({reinterpret_cast(&def), name}); - m_private->entry_list.emplace_back(index, OprBegin, get_host_time_now()); - auto&& comp_nodes = collect_comp_nodes(def, inputs); - for (auto&& comp_node : comp_nodes) { - m_private->entry_list.emplace_back(index, OprBegin, create_event(comp_node)); - } - auto output = (*backup)(def, inputs); - for (auto&& comp_node : comp_nodes) { - m_private->entry_list.emplace_back(index, OprEnd, create_event(comp_node)); - } - m_private->entry_list.emplace_back(index, OprEnd, get_host_time_now()); - return output; - }; - m_private->hook_list.push_back({&trait, std::move(backup)}); +void DeviceTimer::reset(thin_function host_timer) { + CompNode::foreach ([this, host_timer](CompNode device) { + auto base_event = EventPool::with_timer().alloc_shared(device); + base_event->record(); + m_base_event_table[device] = {std::move(base_event), host_timer()}; }); } -void Profiler::disable() { - for (auto&& hook : m_private->hook_list) { - std::get<0>(hook)->apply_on_physical_tensor = - std::move(*std::get<1>(hook)); - } - m_private->hook_list.clear(); - m_private->enabled = false; -} - -Profiler::~Profiler() { -} - -void Profiler::dump(){ - dump(m_private->path); +thin_function DeviceTimer::get_device_time(CompNode device) { + auto event = EventPool::with_timer().alloc_shared(device); + event->record(); + auto base = m_base_event_table[device]; + return [base, event] { + auto [base_event, host_time] = base; + //TODO: sync once for each compnode + event->host_wait(); + return base_event->elapsed_time_until(*event) * 1000 + host_time; + }; } -void Profiler::dump(const std::string& path) { - using namespace json; - auto obj = json::Object::make(); - if (!(*obj)["traceEvents"]) { - (*obj)["traceEvents"] = Array::make(); - } - auto& trace_events = (*obj)["traceEvents"]->cast_final(); - for (auto&& entry : m_private->entry_list) { - auto trace_event_ptr = Object::make(); - auto& trace_event = *trace_event_ptr; - std::string name; - size_t id; - int pid; - std::string tid; - double ts; - const char* ph; - name = m_private->op_list[entry.index].name; - id = entry.index; - pid = getpid(); - if (entry.device) { - entry.device->host_wait(); - ts = get_device_time(*entry.device); - tid = entry.device->comp_node().to_string(); - } else { - ts = entry.host; - tid = "host"; - } - switch (entry.type) { - case OprBegin: { - ph = "B"; - break; +void Profiler::start() { + m_host_timer.reset(); + m_device_timer.reset([&]{ return m_host_timer.get_msecs();} ); + OpTrait::for_each_trait([this](OpTrait& trait) { + FunctionHooker hooker{&trait.apply_on_physical_tensor}; + hooker.apply_hook([this](auto&& apply, const OpDef& def, + const SmallVector& inputs) { + ProfileEntry entry; + entry.op = def.copy(); + double host_begin = m_host_timer.get_msecs(); + auto&& comp_nodes = collect_comp_nodes(def, inputs); + for (auto&& comp_node : comp_nodes) { + entry.device_list.push_back( + {comp_node, + m_device_timer.get_device_time(comp_node), + {}}); } - case OprEnd: { - ph = "E"; - break; + auto outputs = apply(def, inputs); + for (auto& [cn, dev_begin, dev_end] : entry.device_list) { + MGB_MARK_USED_VAR(cn); + MGB_MARK_USED_VAR(dev_begin); + dev_end = m_device_timer.get_device_time(cn); } - } - trace_event["name"] = String::make(name); - trace_event["id"] = Number::make(id); - trace_event["pid"] = Number::make(pid); - trace_event["tid"] = String::make(tid); - trace_event["ts"] = Number::make(ts * 1000); - trace_event["ph"] = String::make(ph); - trace_events.add(std::move(trace_event_ptr)); - } - obj->writeto_fpath(path.empty() ? path : m_private->path); - m_private->dump_count++; + entry.host = {host_begin, m_host_timer.get_msecs()}; + m_profile->push_back(std::move(entry)); + return outputs; + }); + m_hooker_list.push_back(std::move(hooker)); + }); } -std::string Profiler::print_op(const OpDef& def){ - auto* opr_attr = def.try_cast_final(); - if(opr_attr){ - return std::string("OprAttr:") + opr_attr->type; +void Profiler::stop() { + m_hooker_list.clear(); + for (auto& entry : *m_profile) { + entry.wait_device(); } - return def.dyn_typeinfo()->name; } } // namespace imperative diff --git a/imperative/src/include/megbrain/imperative/function_hook.h b/imperative/src/include/megbrain/imperative/function_hook.h new file mode 100644 index 00000000..64582f11 --- /dev/null +++ b/imperative/src/include/megbrain/imperative/function_hook.h @@ -0,0 +1,55 @@ +/** + * \file imperative/src/include/megbrain/imperative/function_hook.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2020 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "megbrain/utils/thin/function.h" + +namespace mgb { +namespace imperative { +template +class FunctionHooker; + +template +class FunctionHooker { +public: + using FunctionType = thin_function; + using HookType = thin_function; + explicit FunctionHooker(FunctionType* fptr) : m_fptr{fptr} {} + +public: + FunctionHooker& apply_hook(HookType&& hook) { + if (!m_backup) { + FunctionType* backup = new FunctionType(*m_fptr); + std::function restorer = + [fptr = m_fptr](FunctionType* bkp) -> void { + *fptr = *bkp; + delete bkp; + }; + m_backup = decltype(m_backup)(backup, restorer); + } + *m_fptr = [func = *m_fptr, hook](TArgs&&... args) -> TRet { + return hook(func, std::forward(args)...); + }; + return *this; + } + +private: + FunctionType* m_fptr; + std::unique_ptr> m_backup; +}; + +template +FunctionHooker(thin_function* f) + ->FunctionHooker; +} // namespace imperative + +} // namespace mgb diff --git a/imperative/src/include/megbrain/imperative/profiler.h b/imperative/src/include/megbrain/imperative/profiler.h index a0a278e8..bece8226 100644 --- a/imperative/src/include/megbrain/imperative/profiler.h +++ b/imperative/src/include/megbrain/imperative/profiler.h @@ -11,6 +11,8 @@ #pragma once +#include + #include "megbrain/comp_node.h" #include "megbrain/graph/event.h" #include "megbrain/utils/json.h" @@ -18,37 +20,59 @@ #include "megbrain/imperative/op_def.h" +#include "megbrain/imperative/function_hook.h" + namespace mgb { namespace imperative { -class ProfilerPrivate; +struct ProfileEntry{ + using TimeClosure = std::function; + std::shared_ptr op; + std::tuple host; + std::vector> device_list; + void wait_device(){ + for(auto& [cn, begin, end]: device_list){ + MGB_MARK_USED_VAR(cn); + begin = [begin=begin()]{ return begin; }; + end = [end = end()]{ return end; }; + } + } +}; + +using Profile = std::vector; -using OpDefPrinter = thin_function; +class DeviceTimer { +public: + using SharedEvent = std::shared_ptr; + DeviceTimer() = default; + void reset(thin_function host_timer); + thin_function get_device_time(CompNode device); -class Profiler { private: - std::unique_ptr m_private; + CompNode::UnorderedMap> m_base_event_table; +}; +class Profiler { public: - enum EventKind { OprBegin, OprEnd }; + Profiler(Profile* profile = nullptr) { + if (!profile) { + m_owned_profile = std::make_unique(); + profile = m_owned_profile.get(); + } + m_profile = profile; + } + void start(); + void stop(); + Profile& get_profile() { return *m_profile; } -public: - Profiler(); - Profiler(const std::string& path); - ~Profiler(); - void enable(); - void disable(); - void dump(); - void dump(const std::string& path); - void record_host(size_t id, std::string name, EventKind type, - double host_time); - void record_device(size_t id, std::string name, EventKind type, - double host_time, CompNode comp_node); - double get_device_time(CompNode::Event& event); - size_t get_dump_count(); - std::unique_ptr create_event(CompNode comp_node); - double get_host_time_now(); - std::string print_op(const OpDef& def); +private: + DeviceTimer m_device_timer; + RealTimer m_host_timer; + Profile* m_profile; + std::unique_ptr m_owned_profile; + std::vector> + m_hooker_list; }; + } // namespace imperative } // namespace mgb diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index f302bf8d..d5eb3134 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -89,8 +89,8 @@ namespace { /* ==================== EventPool ==================== */ -CompNode::EventPool::EventPool(CompNode cn): - m_cn{cn} +CompNode::EventPool::EventPool(CompNode cn, size_t flags): + m_cn{cn}, m_flags{flags} { } @@ -105,7 +105,7 @@ CompNode::Event* CompNode::EventPool::alloc() { m_free.pop_back(); return rst; } - m_allocated.push_back(m_cn.create_event()); + m_allocated.push_back(m_cn.create_event(m_flags)); return m_allocated.back().get(); } diff --git a/src/core/include/megbrain/comp_node.h b/src/core/include/megbrain/comp_node.h index 8b71c389..b9fcee3f 100644 --- a/src/core/include/megbrain/comp_node.h +++ b/src/core/include/megbrain/comp_node.h @@ -643,9 +643,10 @@ class CompNode::EventPool { std::vector> m_allocated; std::vector m_free; Spinlock m_lock; + size_t m_flags; public: - explicit EventPool(CompNode cn); + explicit EventPool(CompNode cn, size_t flags = 0); ~EventPool(); CompNode::Event* alloc();