@@ -6,24 +6,148 @@ | |||
# Unless required by applicable law or agreed to in writing, | |||
# software distributed under the License is distributed on an | |||
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
from typing import Optional | |||
import base64 | |||
import json | |||
import os | |||
from typing import List, Optional | |||
from ..core._imperative_rt import ProfilerImpl | |||
from ..core._imperative_rt import OperatorNodeConfig, ProfileEntry | |||
from ..core._imperative_rt import ProfilerImpl as _Profiler | |||
from ..core._imperative_rt.imperative import sync | |||
from ..core._imperative_rt.ops import CollectiveCommMode | |||
from ..core.ops.builtin import GetVarShape | |||
class Profiler: | |||
def __init__(self, path: Optional[str] = None): | |||
self.impl = ProfilerImpl(path) | |||
r""" | |||
Profile graph execution in imperative mode. | |||
:type path: Optional[str] | |||
:param path: default path for profiler to dump | |||
Examples: | |||
.. testcode:: | |||
import megengine as mge | |||
import megengine.module as M | |||
import megengine.utils.profiler.Profiler | |||
# With Learnable Parameters | |||
for iter in range(0, 10): | |||
# Only profile record of last iter would be saved | |||
with Profiler("profile.json"): | |||
# your code here | |||
# Then open the profile file in chrome timeline window | |||
""" | |||
# see https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html | |||
GOOD = "good" | |||
BAD = "bad" | |||
TERRIBLE = "terrible" | |||
BLACK = "black" | |||
GREY = "grey" | |||
WHITE = "white" | |||
YELLOW = "yellow" | |||
OLIVE = "olive" | |||
def __init__(self, path: str = "profile.json"): | |||
self._impl = _Profiler() | |||
self._path = path | |||
self._color_map = {} | |||
self._type_map = { | |||
OperatorNodeConfig: lambda x: self.print_opnode_config(x), | |||
bytes: lambda x: base64.encodebytes(x).decode("ascii"), | |||
CollectiveCommMode: lambda x: str(x), | |||
} | |||
def __enter__(self): | |||
sync() | |||
self.impl.enable() | |||
self._impl.start() | |||
return self | |||
def __exit__(self, val, type, trace): | |||
sync() | |||
self.impl.disable() | |||
self._impl.stop() | |||
if self._path is not None: | |||
self.dump() | |||
def recolor(self, target: str, color: str): | |||
self._color_map[target] = color | |||
return self | |||
def print_opnode_config(self, config): | |||
return self.make_dict( | |||
name=config.name, dtype=config.dtype, comp_node_arr=config.comp_node_arr, | |||
) | |||
def fetch_attrs(self, op): | |||
attrs = dir(op) | |||
results = {} | |||
for attr in attrs: | |||
if attr.startswith("_"): | |||
continue | |||
value = op.__getattribute__(attr) | |||
if callable(value): | |||
continue | |||
value_type = type(value) | |||
if value_type in self._type_map: | |||
value = self._type_map[value_type](value) | |||
results[attr] = value | |||
return results | |||
def make_dict(self, **kwargs): | |||
unused_keys = [] | |||
for k, v in kwargs.items(): | |||
if v is None: | |||
unused_keys.append(k) | |||
for k in unused_keys: | |||
del kwargs[k] | |||
return kwargs | |||
def dump(self, path: Optional[str] = None): | |||
self.impl.dump(path) | |||
pid = os.getpid() | |||
if path is None: | |||
path = self._path | |||
trace_events = [] | |||
def append_event(**kwargs): | |||
trace_events.append(self.make_dict(**kwargs)) | |||
entries: List[ProfileEntry] = self._impl.dump() | |||
for id, entry in enumerate(entries): | |||
op = entry.op | |||
name = type(op).__name__ | |||
host_begin, host_end = entry.host | |||
device_list = entry.device_list | |||
args = self.fetch_attrs(op) | |||
args["__id__"] = "[{}]".format(id) | |||
cname = self._color_map[name] if name in self._color_map else None | |||
cat = name | |||
for ts, ph in [(host_begin, "B"), (host_end, "E")]: | |||
append_event( | |||
name=name, | |||
ph=ph, | |||
ts=ts * 1000, | |||
pid=pid, | |||
tid="host", | |||
args=args, | |||
cname=cname, | |||
cat=cat, | |||
) | |||
for device, device_begin, device_end in device_list: | |||
for ts, ph in [(device_begin(), "B"), (device_end(), "E")]: | |||
append_event( | |||
name=name, | |||
ph=ph, | |||
ts=ts * 1000, | |||
pid=pid, | |||
tid=str(device), | |||
args=args, | |||
cname=cname, | |||
) | |||
with open(path, "w") as f: | |||
json.dump(trace_events, f, indent=2) |
@@ -651,9 +651,14 @@ PyObject* npy::dtype_mgb2np(mgb::DType dtype) { | |||
// https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.PyArray_TypeObjectFromType | |||
// the following is equivalent to PyArray_TypeObjectFromType for built-in | |||
// types. | |||
if(!dtype.valid()){ | |||
Py_XINCREF(Py_None); | |||
return Py_None; | |||
} | |||
auto descr = dtype_mgb2np_descr(dtype); | |||
if (descr == nullptr) { | |||
return nullptr; | |||
Py_XINCREF(Py_None); | |||
return Py_None; | |||
} | |||
if (dtype.has_param()) { | |||
return reinterpret_cast<PyObject*>(descr.release()); | |||
@@ -199,32 +199,22 @@ void init_utils(py::module m) { | |||
m.def("_get_device_count", &mgb::CompNode::get_device_count, | |||
"Get total number of specific devices on this system"); | |||
using mgb::imperative::Profiler; | |||
using mgb::imperative::ProfileEntry; | |||
py::class_<Profiler>(m, "ProfilerImpl") | |||
py::class_<ProfileEntry>(m, "ProfileEntry") | |||
.def_readwrite("op", &ProfileEntry::op) | |||
.def_readwrite("host", &ProfileEntry::host) | |||
.def_readwrite("device_list", &ProfileEntry::device_list); | |||
py::class_<mgb::imperative::Profiler>(m, "ProfilerImpl") | |||
.def(py::init<>()) | |||
.def(py::init<const std::string&>()) | |||
.def("enable", | |||
[](Profiler& profiler) -> Profiler& { | |||
profiler.enable(); | |||
return profiler; | |||
}) | |||
.def("disable", | |||
[](Profiler& profiler) { | |||
if (profiler.get_dump_count() == 0) { | |||
profiler.dump(); | |||
} | |||
profiler.disable(); | |||
}) | |||
.def("dump", | |||
[](Profiler& profiler, std::optional<std::string> path) { | |||
if (path.has_value()) { | |||
profiler.dump(path.value()); | |||
} else { | |||
profiler.dump(); | |||
} | |||
}, | |||
py::arg("path") = std::optional<std::string>()); | |||
.def("start", | |||
[](mgb::imperative::Profiler& profiler) { profiler.start(); }) | |||
.def("stop", | |||
[](mgb::imperative::Profiler& profiler) { profiler.stop(); }) | |||
.def("dump", [](mgb::imperative::Profiler& profiler) { | |||
return profiler.get_profile(); | |||
}); | |||
using mgb::imperative::TensorSanityCheck; | |||
py::class_<TensorSanityCheck>(m, "TensorSanityCheckImpl") | |||
@@ -0,0 +1,68 @@ | |||
#include "./event_pool.h" | |||
namespace mgb { | |||
namespace imperative { | |||
EventPool::EventPool(size_t flags) : m_flags{flags} {} | |||
EventPool& EventPool::with_timer() { | |||
static Spinlock lock; | |||
static std::unique_ptr<EventPool> ptr; | |||
MGB_LOCK_GUARD(lock); | |||
if (!ptr || ptr->is_finalized()) { | |||
ptr.reset(new EventPool(CompNode::Event::NEED_TIMER)); | |||
} | |||
return *ptr; | |||
} | |||
EventPool& EventPool::without_timer() { | |||
static Spinlock lock; | |||
static std::unique_ptr<EventPool> ptr; | |||
MGB_LOCK_GUARD(lock); | |||
if (!ptr || ptr->is_finalized()) { | |||
ptr.reset(new EventPool()); | |||
} | |||
return *ptr; | |||
} | |||
CompNode::Event* EventPool::alloc(CompNode cn) { | |||
CompNode::EventPool* pool; | |||
{ | |||
MGB_LOCK_GUARD(m_lock); | |||
auto iter = m_cn2pool.find(cn); | |||
if (iter == m_cn2pool.end()) { | |||
iter = m_cn2pool | |||
.emplace(std::piecewise_construct, | |||
std::forward_as_tuple(cn), | |||
std::forward_as_tuple(cn, m_flags)) | |||
.first; | |||
} | |||
pool = &iter->second; | |||
} | |||
return pool->alloc(); | |||
} | |||
std::shared_ptr<CompNode::Event> EventPool::alloc_shared(CompNode cn) { | |||
auto* raw_event = alloc(cn); | |||
return {raw_event, [this](CompNode::Event* event){ this->free(event); }}; | |||
} | |||
void EventPool::free(CompNode::Event* event) { | |||
CompNode::EventPool* pool; | |||
{ | |||
MGB_LOCK_GUARD(m_lock); | |||
pool = &m_cn2pool.at(event->comp_node()); | |||
} | |||
pool->free(event); | |||
} | |||
std::shared_ptr<void> EventPool::on_comp_node_finalize() { | |||
MGB_LOCK_GUARD(m_lock); | |||
for (auto&& i : m_cn2pool) { | |||
i.second.assert_all_freed(); | |||
} | |||
return {}; | |||
} | |||
EventPool::~EventPool() { | |||
for (auto&& i : m_cn2pool) { | |||
i.second.assert_all_freed(); | |||
} | |||
} | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -0,0 +1,25 @@ | |||
#pragma once | |||
#include "megbrain/comp_node.h" | |||
namespace mgb { | |||
namespace imperative { | |||
class EventPool : CompNodeDepedentObject { | |||
CompNode::UnorderedMap<CompNode::EventPool> m_cn2pool; | |||
Spinlock m_lock; | |||
size_t m_flags; | |||
EventPool(size_t flags = 0); | |||
public: | |||
static EventPool& with_timer(); | |||
static EventPool& without_timer(); | |||
CompNode::Event* alloc(CompNode cn); | |||
std::shared_ptr<CompNode::Event> alloc_shared(CompNode cn); | |||
void free(CompNode::Event* event); | |||
std::shared_ptr<void> on_comp_node_finalize(); | |||
~EventPool(); | |||
}; | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -11,6 +11,7 @@ | |||
#include "megbrain/imperative.h" | |||
#include "megbrain/imperative/blob_manager.h" | |||
#include "./event_pool.h" | |||
#include <mutex> | |||
namespace mgb { | |||
@@ -18,86 +19,31 @@ namespace imperative { | |||
namespace { | |||
class EventPool : CompNodeDepedentObject { | |||
CompNode::UnorderedMap<CompNode::EventPool> m_cn2pool; | |||
Spinlock m_lock; | |||
EventPool() = default; | |||
public: | |||
static EventPool& inst() { | |||
static Spinlock lock; | |||
static std::unique_ptr<EventPool> ptr; | |||
MGB_LOCK_GUARD(lock); | |||
if (!ptr || ptr->is_finalized()) { | |||
ptr.reset(new EventPool()); | |||
} | |||
return *ptr; | |||
} | |||
CompNode::Event* alloc(CompNode cn) { | |||
CompNode::EventPool *pool; | |||
{ | |||
MGB_LOCK_GUARD(m_lock); | |||
auto iter = m_cn2pool.find(cn); | |||
if (iter == m_cn2pool.end()) { | |||
iter = m_cn2pool.emplace( | |||
std::piecewise_construct, | |||
std::forward_as_tuple(cn), | |||
std::forward_as_tuple(cn)).first; | |||
} | |||
pool = &iter->second; | |||
} | |||
return pool->alloc(); | |||
} | |||
void free(CompNode::Event* event) { | |||
CompNode::EventPool* pool; | |||
{ | |||
MGB_LOCK_GUARD(m_lock); | |||
pool = &m_cn2pool.at(event->comp_node()); | |||
} | |||
pool->free(event); | |||
} | |||
std::shared_ptr<void> on_comp_node_finalize() override { | |||
MGB_LOCK_GUARD(m_lock); | |||
for (auto&& i : m_cn2pool) { | |||
i.second.assert_all_freed(); | |||
} | |||
return {}; | |||
} | |||
~EventPool() { | |||
for (auto&& i : m_cn2pool) { | |||
i.second.assert_all_freed(); | |||
} | |||
} | |||
}; | |||
class AsyncReleaser : public CompNodeDepedentObject { | |||
struct WaiterParam { | |||
CompNode cn; | |||
CompNode::Event *event; | |||
CompNode::Event* event; | |||
BlobPtr blob; | |||
HostTensorStorage::RawStorage storage; | |||
}; | |||
class Waiter final: public AsyncQueueSC<WaiterParam, Waiter> { | |||
AsyncReleaser *m_par_releaser; | |||
public: | |||
Waiter(AsyncReleaser *releaser): | |||
m_par_releaser(releaser) | |||
{ | |||
class Waiter final : public AsyncQueueSC<WaiterParam, Waiter> { | |||
AsyncReleaser* m_par_releaser; | |||
public: | |||
Waiter(AsyncReleaser* releaser) : m_par_releaser(releaser) {} | |||
void process_one_task(WaiterParam& param) { | |||
if (param.event->finished()) { | |||
param.blob.reset(); | |||
param.storage.reset(); | |||
EventPool::without_timer().free(param.event); | |||
return; | |||
} | |||
void process_one_task(WaiterParam ¶m) { | |||
if (param.event->finished()) { | |||
param.blob.reset(); | |||
param.storage.reset(); | |||
EventPool::inst().free(param.event); | |||
return; | |||
} | |||
using namespace std::literals; | |||
std::this_thread::sleep_for(1us); | |||
add_task(std::move(param)); | |||
} | |||
using namespace std::literals; | |||
std::this_thread::sleep_for(1us); | |||
add_task(std::move(param)); | |||
} | |||
}; | |||
Waiter m_waiter{this}; | |||
@@ -113,20 +59,17 @@ public: | |||
return &releaser; | |||
} | |||
~AsyncReleaser() { | |||
m_waiter.wait_task_queue_empty(); | |||
} | |||
~AsyncReleaser() { m_waiter.wait_task_queue_empty(); } | |||
void add(BlobPtr blob, CompNode cn) { | |||
add(cn, std::move(blob), {}); | |||
} | |||
void add(BlobPtr blob, CompNode cn) { add(cn, std::move(blob), {}); } | |||
void add(const HostTensorND& hv) { | |||
add(hv.comp_node(), {}, hv.storage().raw_storage()); | |||
} | |||
void add(CompNode cn, BlobPtr blob, HostTensorStorage::RawStorage storage = {}) { | |||
auto event = EventPool::inst().alloc(cn); | |||
void add(CompNode cn, BlobPtr blob, | |||
HostTensorStorage::RawStorage storage = {}) { | |||
auto event = EventPool::without_timer().alloc(cn); | |||
event->record(); | |||
m_waiter.add_task({cn, event, std::move(blob), std::move(storage)}); | |||
} | |||
@@ -290,10 +233,10 @@ struct MultiCNConstTensorCache : CompNodeDepedentObject { | |||
MultiCNConstTensorCache const_tensor_cache; | |||
} // namespace | |||
} // namespace | |||
void EventDeleter::operator()(CompNode::Event* event) { | |||
EventPool::inst().free(event); | |||
EventPool::without_timer().free(event); | |||
} | |||
Blob::Blob(const DeviceTensorStorage& s): | |||
@@ -373,7 +316,7 @@ void Tensor::fetch_value() { | |||
MGB_LOCK_GUARD(m_mtx); | |||
if (m_value.empty()) { | |||
m_value.copy_from(dev_tensor()); | |||
m_value_ready.reset(EventPool::inst().alloc(comp_node())); | |||
m_value_ready.reset(EventPool::without_timer().alloc(comp_node())); | |||
m_value_ready->record(); | |||
} | |||
} | |||
@@ -421,7 +364,7 @@ CompNode::Event* Tensor::get_or_create_event() { | |||
return e; | |||
} | |||
} // namespace imperative | |||
} // namespace mgb | |||
} // namespace imperative | |||
} // namespace mgb | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -11,63 +11,18 @@ | |||
#include "megbrain/imperative/profiler.h" | |||
#if defined(_MSC_VER) || defined(WIN32) | |||
#include <windows.h> | |||
#define getpid GetCurrentProcessId | |||
#else | |||
#include <sys/unistd.h> | |||
#endif | |||
#if defined(__APPLE__) || defined(__MACOSX) | |||
#include <unistd.h> | |||
#endif | |||
#include <variant> | |||
#include "megbrain/imperative/ops/opr_attr.h" | |||
#include "megbrain/imperative/physical_tensor.h" | |||
#include "./event_pool.h" | |||
#include "./op_trait.h" | |||
namespace mgb { | |||
namespace imperative { | |||
class OpDefInfo{ | |||
public: | |||
size_t id; | |||
std::string name; | |||
}; | |||
class ProfilerEntry { | |||
public: | |||
ProfilerEntry(size_t index, Profiler::EventKind type, std::unique_ptr<CompNode::Event> device) | |||
: index{index}, type{type}, device{std::move(device)}{ | |||
} | |||
ProfilerEntry(size_t index, Profiler::EventKind type, double host): index{index}, type{type}, host{host}{ | |||
} | |||
size_t index; | |||
Profiler::EventKind type; | |||
std::unique_ptr<CompNode::Event> device = nullptr; | |||
double host = 0; | |||
}; | |||
class ProfilerPrivate { | |||
public: | |||
std::vector<OpDefInfo> op_list; | |||
std::vector<ProfilerEntry> entry_list; | |||
std::vector<std::unique_ptr<CompNode::Event>> event_list; | |||
std::vector<std::tuple<OpTrait*, std::unique_ptr<ApplyOnPhysicalTensor>>> | |||
hook_list; | |||
ThinHashMap<CompNode, std::tuple<CompNode::Event*, double>> | |||
comp_node_begin_map; | |||
ThinHashMap<CompNode, CompNode::Event*> comp_node_end_map; | |||
RealTimer timer; | |||
size_t dump_count = 0; | |||
bool enabled = false; | |||
std::string path; | |||
}; | |||
namespace { | |||
CompNode::UnorderedSet collect_comp_nodes( | |||
const OpDef& def, const SmallVector<TensorPtr>& inputs) { | |||
@@ -80,145 +35,65 @@ CompNode::UnorderedSet collect_comp_nodes( | |||
} | |||
return comp_nodes; | |||
} | |||
} // namespace | |||
std::unique_ptr<CompNode::Event> Profiler::create_event(CompNode comp_node){ | |||
auto event = comp_node.create_event(CompNode::Event::NEED_TIMER); | |||
event->record(); | |||
auto& [begin, time] = m_private->comp_node_begin_map[comp_node]; | |||
if (begin == nullptr) { | |||
begin = event.get(); | |||
time = m_private->timer.get_msecs(); | |||
} | |||
return event; | |||
} | |||
double Profiler::get_host_time_now(){ | |||
return m_private->timer.get_msecs(); | |||
} | |||
double Profiler::get_device_time(CompNode::Event& event) { | |||
auto [base_event, host_time] = | |||
m_private->comp_node_begin_map[event.comp_node()]; | |||
if (base_event == &event) { | |||
return host_time; | |||
} else { | |||
return host_time + base_event->elapsed_time_until(event) * 1000; | |||
} | |||
} | |||
size_t Profiler::get_dump_count(){ | |||
return m_private->dump_count; | |||
} | |||
Profiler::Profiler() { | |||
m_private = std::make_unique<ProfilerPrivate>(); | |||
} | |||
Profiler::Profiler(const std::string& path): Profiler() { | |||
m_private->path = path; | |||
} | |||
} // namespace | |||
void Profiler::enable() { | |||
m_private->enabled = true; | |||
CompNode::sync_all(); | |||
OpTrait::for_each_trait([this](OpTrait& trait) { | |||
auto backup = std::make_unique<ApplyOnPhysicalTensor>( | |||
std::move(trait.apply_on_physical_tensor)); | |||
trait.apply_on_physical_tensor = | |||
[this, backup = backup.get()] ( | |||
const OpDef& def, | |||
const SmallVector<TensorPtr>& inputs){ | |||
size_t index = m_private->op_list.size(); | |||
std::string name = "[" + std::to_string(index) + "]" + print_op(def); | |||
m_private->op_list.push_back({reinterpret_cast<size_t>(&def), name}); | |||
m_private->entry_list.emplace_back(index, OprBegin, get_host_time_now()); | |||
auto&& comp_nodes = collect_comp_nodes(def, inputs); | |||
for (auto&& comp_node : comp_nodes) { | |||
m_private->entry_list.emplace_back(index, OprBegin, create_event(comp_node)); | |||
} | |||
auto output = (*backup)(def, inputs); | |||
for (auto&& comp_node : comp_nodes) { | |||
m_private->entry_list.emplace_back(index, OprEnd, create_event(comp_node)); | |||
} | |||
m_private->entry_list.emplace_back(index, OprEnd, get_host_time_now()); | |||
return output; | |||
}; | |||
m_private->hook_list.push_back({&trait, std::move(backup)}); | |||
void DeviceTimer::reset(thin_function<double()> host_timer) { | |||
CompNode::foreach ([this, host_timer](CompNode device) { | |||
auto base_event = EventPool::with_timer().alloc_shared(device); | |||
base_event->record(); | |||
m_base_event_table[device] = {std::move(base_event), host_timer()}; | |||
}); | |||
} | |||
void Profiler::disable() { | |||
for (auto&& hook : m_private->hook_list) { | |||
std::get<0>(hook)->apply_on_physical_tensor = | |||
std::move(*std::get<1>(hook)); | |||
} | |||
m_private->hook_list.clear(); | |||
m_private->enabled = false; | |||
} | |||
Profiler::~Profiler() { | |||
} | |||
void Profiler::dump(){ | |||
dump(m_private->path); | |||
thin_function<double()> DeviceTimer::get_device_time(CompNode device) { | |||
auto event = EventPool::with_timer().alloc_shared(device); | |||
event->record(); | |||
auto base = m_base_event_table[device]; | |||
return [base, event] { | |||
auto [base_event, host_time] = base; | |||
//TODO: sync once for each compnode | |||
event->host_wait(); | |||
return base_event->elapsed_time_until(*event) * 1000 + host_time; | |||
}; | |||
} | |||
void Profiler::dump(const std::string& path) { | |||
using namespace json; | |||
auto obj = json::Object::make(); | |||
if (!(*obj)["traceEvents"]) { | |||
(*obj)["traceEvents"] = Array::make(); | |||
} | |||
auto& trace_events = (*obj)["traceEvents"]->cast_final<Array>(); | |||
for (auto&& entry : m_private->entry_list) { | |||
auto trace_event_ptr = Object::make(); | |||
auto& trace_event = *trace_event_ptr; | |||
std::string name; | |||
size_t id; | |||
int pid; | |||
std::string tid; | |||
double ts; | |||
const char* ph; | |||
name = m_private->op_list[entry.index].name; | |||
id = entry.index; | |||
pid = getpid(); | |||
if (entry.device) { | |||
entry.device->host_wait(); | |||
ts = get_device_time(*entry.device); | |||
tid = entry.device->comp_node().to_string(); | |||
} else { | |||
ts = entry.host; | |||
tid = "host"; | |||
} | |||
switch (entry.type) { | |||
case OprBegin: { | |||
ph = "B"; | |||
break; | |||
void Profiler::start() { | |||
m_host_timer.reset(); | |||
m_device_timer.reset([&]{ return m_host_timer.get_msecs();} ); | |||
OpTrait::for_each_trait([this](OpTrait& trait) { | |||
FunctionHooker hooker{&trait.apply_on_physical_tensor}; | |||
hooker.apply_hook([this](auto&& apply, const OpDef& def, | |||
const SmallVector<TensorPtr>& inputs) { | |||
ProfileEntry entry; | |||
entry.op = def.copy(); | |||
double host_begin = m_host_timer.get_msecs(); | |||
auto&& comp_nodes = collect_comp_nodes(def, inputs); | |||
for (auto&& comp_node : comp_nodes) { | |||
entry.device_list.push_back( | |||
{comp_node, | |||
m_device_timer.get_device_time(comp_node), | |||
{}}); | |||
} | |||
case OprEnd: { | |||
ph = "E"; | |||
break; | |||
auto outputs = apply(def, inputs); | |||
for (auto& [cn, dev_begin, dev_end] : entry.device_list) { | |||
MGB_MARK_USED_VAR(cn); | |||
MGB_MARK_USED_VAR(dev_begin); | |||
dev_end = m_device_timer.get_device_time(cn); | |||
} | |||
} | |||
trace_event["name"] = String::make(name); | |||
trace_event["id"] = Number::make(id); | |||
trace_event["pid"] = Number::make(pid); | |||
trace_event["tid"] = String::make(tid); | |||
trace_event["ts"] = Number::make(ts * 1000); | |||
trace_event["ph"] = String::make(ph); | |||
trace_events.add(std::move(trace_event_ptr)); | |||
} | |||
obj->writeto_fpath(path.empty() ? path : m_private->path); | |||
m_private->dump_count++; | |||
entry.host = {host_begin, m_host_timer.get_msecs()}; | |||
m_profile->push_back(std::move(entry)); | |||
return outputs; | |||
}); | |||
m_hooker_list.push_back(std::move(hooker)); | |||
}); | |||
} | |||
std::string Profiler::print_op(const OpDef& def){ | |||
auto* opr_attr = def.try_cast_final<const OprAttr>(); | |||
if(opr_attr){ | |||
return std::string("OprAttr:") + opr_attr->type; | |||
void Profiler::stop() { | |||
m_hooker_list.clear(); | |||
for (auto& entry : *m_profile) { | |||
entry.wait_device(); | |||
} | |||
return def.dyn_typeinfo()->name; | |||
} | |||
} // namespace imperative | |||
@@ -0,0 +1,55 @@ | |||
/** | |||
* \file imperative/src/include/megbrain/imperative/function_hook.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
*/ | |||
#pragma once | |||
#include "megbrain/utils/thin/function.h" | |||
namespace mgb { | |||
namespace imperative { | |||
template <typename TFunction> | |||
class FunctionHooker; | |||
template <typename TRet, typename... TArgs> | |||
class FunctionHooker<TRet(TArgs...)> { | |||
public: | |||
using FunctionType = thin_function<TRet(TArgs&&...)>; | |||
using HookType = thin_function<TRet(FunctionType, TArgs&&...)>; | |||
explicit FunctionHooker(FunctionType* fptr) : m_fptr{fptr} {} | |||
public: | |||
FunctionHooker& apply_hook(HookType&& hook) { | |||
if (!m_backup) { | |||
FunctionType* backup = new FunctionType(*m_fptr); | |||
std::function<void(FunctionType*)> restorer = | |||
[fptr = m_fptr](FunctionType* bkp) -> void { | |||
*fptr = *bkp; | |||
delete bkp; | |||
}; | |||
m_backup = decltype(m_backup)(backup, restorer); | |||
} | |||
*m_fptr = [func = *m_fptr, hook](TArgs&&... args) -> TRet { | |||
return hook(func, std::forward<TArgs>(args)...); | |||
}; | |||
return *this; | |||
} | |||
private: | |||
FunctionType* m_fptr; | |||
std::unique_ptr<FunctionType, std::function<void(FunctionType*)>> m_backup; | |||
}; | |||
template <typename TRet, typename... TArgs> | |||
FunctionHooker(thin_function<TRet(TArgs...)>* f) | |||
->FunctionHooker<TRet(TArgs...)>; | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -11,6 +11,8 @@ | |||
#pragma once | |||
#include <variant> | |||
#include "megbrain/comp_node.h" | |||
#include "megbrain/graph/event.h" | |||
#include "megbrain/utils/json.h" | |||
@@ -18,37 +20,59 @@ | |||
#include "megbrain/imperative/op_def.h" | |||
#include "megbrain/imperative/function_hook.h" | |||
namespace mgb { | |||
namespace imperative { | |||
class ProfilerPrivate; | |||
struct ProfileEntry{ | |||
using TimeClosure = std::function<double()>; | |||
std::shared_ptr<OpDef> op; | |||
std::tuple<double, double> host; | |||
std::vector<std::tuple<CompNode, TimeClosure, TimeClosure>> device_list; | |||
void wait_device(){ | |||
for(auto& [cn, begin, end]: device_list){ | |||
MGB_MARK_USED_VAR(cn); | |||
begin = [begin=begin()]{ return begin; }; | |||
end = [end = end()]{ return end; }; | |||
} | |||
} | |||
}; | |||
using Profile = std::vector<ProfileEntry>; | |||
using OpDefPrinter = thin_function<std::string(const OpDef&)>; | |||
class DeviceTimer { | |||
public: | |||
using SharedEvent = std::shared_ptr<CompNode::Event>; | |||
DeviceTimer() = default; | |||
void reset(thin_function<double()> host_timer); | |||
thin_function<double()> get_device_time(CompNode device); | |||
class Profiler { | |||
private: | |||
std::unique_ptr<ProfilerPrivate> m_private; | |||
CompNode::UnorderedMap<std::tuple<SharedEvent, double>> m_base_event_table; | |||
}; | |||
class Profiler { | |||
public: | |||
enum EventKind { OprBegin, OprEnd }; | |||
Profiler(Profile* profile = nullptr) { | |||
if (!profile) { | |||
m_owned_profile = std::make_unique<Profile>(); | |||
profile = m_owned_profile.get(); | |||
} | |||
m_profile = profile; | |||
} | |||
void start(); | |||
void stop(); | |||
Profile& get_profile() { return *m_profile; } | |||
public: | |||
Profiler(); | |||
Profiler(const std::string& path); | |||
~Profiler(); | |||
void enable(); | |||
void disable(); | |||
void dump(); | |||
void dump(const std::string& path); | |||
void record_host(size_t id, std::string name, EventKind type, | |||
double host_time); | |||
void record_device(size_t id, std::string name, EventKind type, | |||
double host_time, CompNode comp_node); | |||
double get_device_time(CompNode::Event& event); | |||
size_t get_dump_count(); | |||
std::unique_ptr<CompNode::Event> create_event(CompNode comp_node); | |||
double get_host_time_now(); | |||
std::string print_op(const OpDef& def); | |||
private: | |||
DeviceTimer m_device_timer; | |||
RealTimer m_host_timer; | |||
Profile* m_profile; | |||
std::unique_ptr<Profile> m_owned_profile; | |||
std::vector<FunctionHooker<decltype(OpDef::apply_on_physical_tensor)>> | |||
m_hooker_list; | |||
}; | |||
} // namespace imperative | |||
} // namespace mgb |
@@ -89,8 +89,8 @@ namespace { | |||
/* ==================== EventPool ==================== */ | |||
CompNode::EventPool::EventPool(CompNode cn): | |||
m_cn{cn} | |||
CompNode::EventPool::EventPool(CompNode cn, size_t flags): | |||
m_cn{cn}, m_flags{flags} | |||
{ | |||
} | |||
@@ -105,7 +105,7 @@ CompNode::Event* CompNode::EventPool::alloc() { | |||
m_free.pop_back(); | |||
return rst; | |||
} | |||
m_allocated.push_back(m_cn.create_event()); | |||
m_allocated.push_back(m_cn.create_event(m_flags)); | |||
return m_allocated.back().get(); | |||
} | |||
@@ -643,9 +643,10 @@ class CompNode::EventPool { | |||
std::vector<std::unique_ptr<CompNode::Event>> m_allocated; | |||
std::vector<CompNode::Event*> m_free; | |||
Spinlock m_lock; | |||
size_t m_flags; | |||
public: | |||
explicit EventPool(CompNode cn); | |||
explicit EventPool(CompNode cn, size_t flags = 0); | |||
~EventPool(); | |||
CompNode::Event* alloc(); | |||