|
|
@@ -30,12 +30,6 @@ using namespace imperative; |
|
|
|
using namespace interpreter; |
|
|
|
using namespace interpreter::intl; |
|
|
|
|
|
|
|
#define RECORD_EVENT(type, ...) \ |
|
|
|
if (Profiler::is_profiling()) { \ |
|
|
|
Profiler::record<type>(type{__VA_ARGS__}); \ |
|
|
|
} \ |
|
|
|
|
|
|
|
|
|
|
|
namespace { |
|
|
|
auto tinfo_to_tid(SmallVector<TensorInfo*> tinfo) { |
|
|
|
SmallVector<uint64_t> tid; |
|
|
@@ -70,12 +64,12 @@ namespace mgb { |
|
|
|
**/ |
|
|
|
SYMBOL_EXPORT |
|
|
|
void imperative_log_profile_begin(const char* message) { |
|
|
|
RECORD_EVENT(CustomEvent, std::string{message}); |
|
|
|
MGB_RECORD_EVENT(CustomEvent, std::string{message}); |
|
|
|
} |
|
|
|
|
|
|
|
SYMBOL_EXPORT |
|
|
|
void imperative_log_profile_end(const char* message) { |
|
|
|
RECORD_EVENT(CustomFinishEvent, std::string{message}); |
|
|
|
MGB_RECORD_EVENT(CustomFinishEvent, std::string{message}); |
|
|
|
} |
|
|
|
|
|
|
|
SYMBOL_EXPORT |
|
|
@@ -159,13 +153,13 @@ TensorInfo* ChannelImpl::put_impl(const DeviceTensorND& data, const HostTensorND |
|
|
|
auto& state = get_channel_state(); |
|
|
|
auto _ = StackManager::Guard{"Put", &state.stack_manager}; |
|
|
|
auto info = alloc(); |
|
|
|
RECORD_EVENT(TensorCommandEvent, info->id, TensorCommandKind::Put); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, info->id, TensorCommandKind::Put); |
|
|
|
init(info, {data.layout(), data.comp_node()}); |
|
|
|
info->mem_desc.id = StorageIdentifier::make(++m_storage_id); |
|
|
|
info->ptr = Tensor::make(data, hvalue); |
|
|
|
RECORD_EVENT(TensorProduceEvent, info->id, info->desc.layout, info->desc.comp_node, data.raw_ptr()); |
|
|
|
MGB_RECORD_EVENT(TensorProduceEvent, info->id, info->desc.layout, info->desc.comp_node, data.raw_ptr()); |
|
|
|
info->status = TensorInfo::Produced; |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, info->id, TensorCommandKind::Put); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, info->id, TensorCommandKind::Put); |
|
|
|
return info; |
|
|
|
} |
|
|
|
|
|
|
@@ -231,7 +225,7 @@ void ChannelImpl::dispatch_default_cpu( |
|
|
|
auto _ = StackManager::Guard(name, &state.stack_manager); |
|
|
|
|
|
|
|
auto [output_descs, validated] = OpDef::infer_output_attrs_fallible(*op, input_descs); |
|
|
|
RECORD_EVENT(ShapeInferEvent, validated); |
|
|
|
MGB_RECORD_EVENT(ShapeInferEvent, validated); |
|
|
|
|
|
|
|
SmallVector<DeviceTensorND> input_tensornds; |
|
|
|
input_tensornds.reserve(input_descs.size()); |
|
|
@@ -289,7 +283,7 @@ void ChannelImpl::dispatch_default_cpu( |
|
|
|
} |
|
|
|
return op_info; |
|
|
|
}; |
|
|
|
RECORD_EVENT(OpDispatchEvent, op_id, op->trait()->name, op_info_getter, |
|
|
|
MGB_RECORD_EVENT(OpDispatchEvent, op_id, name, op_info_getter, |
|
|
|
tinfo_to_tid(input_infos), tinfo_to_tid(output_infos), |
|
|
|
state.stack_manager.dump()); |
|
|
|
} |
|
|
@@ -306,7 +300,7 @@ void ChannelImpl::dispatch_kernel( |
|
|
|
auto _ = StackManager::Guard{name, &state.stack_manager}; |
|
|
|
|
|
|
|
auto [output_descs, validated] = OpDef::infer_output_attrs_fallible(*op, input_descs); |
|
|
|
RECORD_EVENT(ShapeInferEvent, validated); |
|
|
|
MGB_RECORD_EVENT(ShapeInferEvent, validated); |
|
|
|
|
|
|
|
ApplyOp cmd{Profiler::next_id(), std::move(op)}; |
|
|
|
cmd.inputs = std::move(input_infos); |
|
|
@@ -332,7 +326,7 @@ void ChannelImpl::dispatch_kernel( |
|
|
|
} |
|
|
|
return op_info; |
|
|
|
}; |
|
|
|
RECORD_EVENT(OpDispatchEvent, cmd.id, cmd.op->trait()->name, op_info_getter, |
|
|
|
MGB_RECORD_EVENT(OpDispatchEvent, cmd.id, name, op_info_getter, |
|
|
|
tinfo_to_tid(cmd.inputs), tinfo_to_tid(cmd.outputs), |
|
|
|
state.stack_manager.dump()); |
|
|
|
m_buffer.enqueue(std::move(cmd)); |
|
|
@@ -426,7 +420,7 @@ DType ChannelImpl::get_dtype(Handle handle) { |
|
|
|
mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), |
|
|
|
"invalid handle: %p", handle); |
|
|
|
auto info = reinterpret_cast<TensorInfo*>(handle); |
|
|
|
RECORD_EVENT(TensorGetPropEvent, info->id, TensorProp::DType); |
|
|
|
MGB_RECORD_EVENT(TensorGetPropEvent, info->id, TensorProp::DType); |
|
|
|
auto ret = info->desc.layout.dtype; |
|
|
|
mgb_assert(ret.valid()); |
|
|
|
return ret; |
|
|
@@ -438,7 +432,7 @@ CompNode ChannelImpl::get_device(Handle handle) { |
|
|
|
mgb_assert(m_valid_handle.find(handle) != m_valid_handle.end(), |
|
|
|
"invalid handle: %p", handle); |
|
|
|
auto info = reinterpret_cast<TensorInfo*>(handle); |
|
|
|
RECORD_EVENT(TensorGetPropEvent, info->id, TensorProp::Device); |
|
|
|
MGB_RECORD_EVENT(TensorGetPropEvent, info->id, TensorProp::Device); |
|
|
|
auto ret = info->desc.comp_node; |
|
|
|
mgb_assert(ret.valid()); |
|
|
|
return ret; |
|
|
@@ -512,7 +506,7 @@ TensorInfo* ChannelImpl::alloc() { |
|
|
|
|
|
|
|
void ChannelImpl::init(TensorInfo* info, LogicalTensorDesc desc) { |
|
|
|
m_valid_handle.insert(info); |
|
|
|
RECORD_EVENT(TensorDeclareEvent, info->id, info->name); |
|
|
|
MGB_RECORD_EVENT(TensorDeclareEvent, info->id, info->name); |
|
|
|
info->status = TensorInfo::Allocated; |
|
|
|
info->desc = std::move(desc); |
|
|
|
info->mem_desc.layout = info->desc.layout; |
|
|
@@ -554,7 +548,7 @@ void ChannelImpl::free(TensorInfo* ptr) { |
|
|
|
} |
|
|
|
|
|
|
|
void ChannelImpl::recursive_free(TensorInfo* ptr) { |
|
|
|
RECORD_EVENT(TensorCommandEvent, ptr->id, TensorCommandKind::RecFree); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, ptr->id, TensorCommandKind::RecFree); |
|
|
|
SmallVector<TensorInfo*> inps; |
|
|
|
if (ptr->producer) { |
|
|
|
for (auto i : ptr->producer->inputs) { |
|
|
@@ -569,7 +563,7 @@ void ChannelImpl::recursive_free(TensorInfo* ptr) { |
|
|
|
recursive_free(i); |
|
|
|
} |
|
|
|
} |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, ptr->id, TensorCommandKind::RecFree); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, ptr->id, TensorCommandKind::RecFree); |
|
|
|
} |
|
|
|
|
|
|
|
void ChannelImpl::real_free(TensorInfo* ptr) { |
|
|
@@ -581,9 +575,9 @@ void ChannelImpl::real_free(TensorInfo* ptr) { |
|
|
|
ptr->detach_producer(); |
|
|
|
bool has_value = ptr->ptr != nullptr; |
|
|
|
if (has_value) { |
|
|
|
RECORD_EVENT(TensorReleaseEvent, ptr->id); |
|
|
|
MGB_RECORD_EVENT(TensorReleaseEvent, ptr->id); |
|
|
|
} |
|
|
|
RECORD_EVENT(TensorEraseEvent, ptr->id, ptr->ptr_use_count); |
|
|
|
MGB_RECORD_EVENT(TensorEraseEvent, ptr->id, ptr->ptr_use_count); |
|
|
|
ptr->status = TensorInfo::Deleted; |
|
|
|
MGB_LOCK_GUARD(m_mutex); |
|
|
|
m_pool.free(ptr); |
|
|
@@ -599,7 +593,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { |
|
|
|
auto& state = get_worker_state(); |
|
|
|
MGB_LOCK_GUARD(m_mutex); |
|
|
|
m_dtr.update_used_time(dest); |
|
|
|
RECORD_EVENT(TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), ptr->dev_tensor().raw_ptr()); |
|
|
|
MGB_RECORD_EVENT(TensorProduceEvent, dest->id, ptr->layout(), ptr->comp_node(), ptr->dev_tensor().raw_ptr()); |
|
|
|
// update tensor desc for static infer |
|
|
|
dest->desc.layout = ptr->layout(); |
|
|
|
dest->desc.comp_node = ptr->comp_node(); |
|
|
@@ -614,7 +608,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr) { |
|
|
|
} |
|
|
|
|
|
|
|
void ChannelImpl::release_tensor(TensorInfo* dest) { |
|
|
|
RECORD_EVENT(TensorReleaseEvent, dest->id); |
|
|
|
MGB_RECORD_EVENT(TensorReleaseEvent, dest->id); |
|
|
|
MGB_LOCK_GUARD(m_mutex); |
|
|
|
dest->ptr.reset(); |
|
|
|
} |
|
|
@@ -625,9 +619,9 @@ void ChannelImpl::regenerate(TensorInfo* dest) { |
|
|
|
m_apply_stack.push({ApplyOp{path->id, path->op, path->inputs, path->outputs, {}}, 0, dest}); |
|
|
|
if (!m_applying) flush_apply_stack(); |
|
|
|
} else if (dest->evict_type == EvictType::SWAP) { |
|
|
|
RECORD_EVENT(TensorCommandEvent, dest->id, TensorCommandKind::ReGen); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, dest->id, TensorCommandKind::ReGen); |
|
|
|
produce_tensor(dest, Tensor::make(dest->h_value)); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, dest->id, TensorCommandKind::ReGen); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, dest->id, TensorCommandKind::ReGen); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -695,7 +689,7 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd) { |
|
|
|
} |
|
|
|
return outputs; |
|
|
|
}; |
|
|
|
RECORD_EVENT(OpExecuteEvent, apply_id); |
|
|
|
MGB_RECORD_EVENT(OpExecuteEvent, apply_id); |
|
|
|
// Begin profiling operator |
|
|
|
SmallVector<std::pair<CompNode, uint64_t>> kernels; |
|
|
|
if (profiling_device) { |
|
|
@@ -710,9 +704,9 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd) { |
|
|
|
} |
|
|
|
for (auto* input: cmd.inputs) { |
|
|
|
auto input_id = input->id; |
|
|
|
RECORD_EVENT(OpInputEvent, input_id); |
|
|
|
RECORD_EVENT(TensorUsageEvent, input_id); |
|
|
|
RECORD_EVENT(OpInputFinishEvent, input_id); |
|
|
|
MGB_RECORD_EVENT(OpInputEvent, input_id); |
|
|
|
MGB_RECORD_EVENT(TensorUsageEvent, input_id); |
|
|
|
MGB_RECORD_EVENT(OpInputFinishEvent, input_id); |
|
|
|
} |
|
|
|
// Fused by command buffer. @see: CommandBuffer::fuse_del |
|
|
|
// Now if dest is inplacable, it's refcnt would be decreased to 1 and owned by tensor_inputs after Del. |
|
|
@@ -721,40 +715,40 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd) { |
|
|
|
// refcnt --, owners: [tensor_inputs] |
|
|
|
// if it's decreased to 1, would be detected at @see: proxy_graph_detail::apply_on_physical_tensor |
|
|
|
uint64_t del_id = del->id; |
|
|
|
RECORD_EVENT(TensorCommandEvent, del_id, TensorCommandKind::Del); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, del_id, TensorCommandKind::Del); |
|
|
|
free(del); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, del_id, TensorCommandKind::Del); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, del_id, TensorCommandKind::Del); |
|
|
|
} |
|
|
|
// Before wait |
|
|
|
//TODO: split operator wait and execute so that OpWait could be corrected recorded. |
|
|
|
// Before execute |
|
|
|
for (auto&& [device, kernel_id]: kernels) { |
|
|
|
RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device); |
|
|
|
RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
MGB_RECORD_EVENT(KernelLaunchEvent, apply_id, kernel_id, device); |
|
|
|
MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
} |
|
|
|
// Apply op |
|
|
|
// Here std::move is REQUIRED for removing duplicated references. |
|
|
|
auto outputs = apply_on_physical_tensor(apply_on_physical_tensor, *cmd.op, inputs); |
|
|
|
// After execute |
|
|
|
for (auto&& [device, kernel_id]: kernels) { |
|
|
|
RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device); |
|
|
|
MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
MGB_RECORD_EVENT(KernelLaunchFinishEvent, apply_id, kernel_id, device); |
|
|
|
} |
|
|
|
// End profiling operator |
|
|
|
mgb_assert(outputs.size() == cmd.outputs.size()); |
|
|
|
for (size_t i = 0; i < outputs.size(); ++i) { |
|
|
|
auto output = cmd.outputs[i]; |
|
|
|
if (output == nullptr) { |
|
|
|
RECORD_EVENT(OpOutputEvent, 0); |
|
|
|
RECORD_EVENT(OpOutputFinishEvent, 0); |
|
|
|
MGB_RECORD_EVENT(OpOutputEvent, 0); |
|
|
|
MGB_RECORD_EVENT(OpOutputFinishEvent, 0); |
|
|
|
} else if (output->ptr != nullptr) { |
|
|
|
RECORD_EVENT(OpOutputEvent, output->id); |
|
|
|
RECORD_EVENT(OpOutputFinishEvent, output->id); |
|
|
|
MGB_RECORD_EVENT(OpOutputEvent, output->id); |
|
|
|
MGB_RECORD_EVENT(OpOutputFinishEvent, output->id); |
|
|
|
} else { |
|
|
|
RECORD_EVENT(OpOutputEvent, output->id); |
|
|
|
MGB_RECORD_EVENT(OpOutputEvent, output->id); |
|
|
|
produce_tensor(output, outputs[i].tensor); |
|
|
|
output->mem_desc = outputs[i].desc; |
|
|
|
RECORD_EVENT(OpOutputFinishEvent, output->id); |
|
|
|
MGB_RECORD_EVENT(OpOutputFinishEvent, output->id); |
|
|
|
sample_on_device(output->desc.comp_node, false); |
|
|
|
} |
|
|
|
} |
|
|
@@ -775,7 +769,7 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd) { |
|
|
|
} |
|
|
|
m_dtr.unpin(cmd.inputs); |
|
|
|
} |
|
|
|
RECORD_EVENT(OpExecuteFinishEvent, apply_id); |
|
|
|
MGB_RECORD_EVENT(OpExecuteFinishEvent, apply_id); |
|
|
|
// End profiling operator |
|
|
|
} |
|
|
|
|
|
|
@@ -789,7 +783,7 @@ void ChannelImpl::flush_apply_stack() { |
|
|
|
m_dtr.pin(cmd.inputs); |
|
|
|
} |
|
|
|
if (recomp) { |
|
|
|
RECORD_EVENT(TensorCommandEvent, recomp->id, TensorCommandKind::ReGen); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, recomp->id, TensorCommandKind::ReGen); |
|
|
|
} |
|
|
|
} |
|
|
|
bool regen = false; |
|
|
@@ -812,7 +806,7 @@ void ChannelImpl::flush_apply_stack() { |
|
|
|
m_apply_stack.pop(); |
|
|
|
do_apply_op(cmd_backup); |
|
|
|
if (recomp_backup) { |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, recomp_backup->id, TensorCommandKind::ReGen); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, recomp_backup->id, TensorCommandKind::ReGen); |
|
|
|
for (auto o : cmd_backup.outputs) { |
|
|
|
if (o) { |
|
|
|
m_dtr.update_dsu_after_recompute(o); |
|
|
@@ -831,7 +825,7 @@ bool ChannelImpl::auto_evict(size_t force_num) { |
|
|
|
size_t current_memory = m_dtr.comp_node.get_used_memory(); |
|
|
|
size_t flag = false; |
|
|
|
while ((state.options.dtr_eviction_threshold > 0 && current_memory > state.options.dtr_eviction_threshold) || force_num > 0) { |
|
|
|
RECORD_EVENT(AutoEvictEvent); |
|
|
|
MGB_RECORD_EVENT(AutoEvictEvent); |
|
|
|
sample_on_device(m_dtr.comp_node, false); |
|
|
|
auto best = m_dtr.find_best_tensor(state.options.enable_dtr_sqrt_sampling && !force_num); |
|
|
|
if (!best) { |
|
|
@@ -849,7 +843,7 @@ bool ChannelImpl::auto_evict(size_t force_num) { |
|
|
|
m_dtr.update_dsu_after_evict(best); |
|
|
|
} |
|
|
|
sample_on_device(m_dtr.comp_node, false); |
|
|
|
RECORD_EVENT(AutoEvictFinishEvent); |
|
|
|
MGB_RECORD_EVENT(AutoEvictFinishEvent); |
|
|
|
} |
|
|
|
return flag; |
|
|
|
} |
|
|
@@ -888,7 +882,7 @@ TensorPtr ChannelImpl::wait_tensor(TensorInfo* info, TensorProp prop) { |
|
|
|
mgb_assert(!m_waitee, "duplicate waitee"); |
|
|
|
m_waitee = info; |
|
|
|
m_waitee_id = Profiler::next_id(); |
|
|
|
RECORD_EVENT(TensorWaitPropEvent, info->id, m_waitee_id, prop); |
|
|
|
MGB_RECORD_EVENT(TensorWaitPropEvent, info->id, m_waitee_id, prop); |
|
|
|
bool require_host = prop == TensorProp::HostValue; |
|
|
|
auto host_available = [&]{ |
|
|
|
return info->ptr && info->ptr->value_fetched(); |
|
|
@@ -904,14 +898,14 @@ TensorPtr ChannelImpl::wait_tensor(TensorInfo* info, TensorProp prop) { |
|
|
|
check_worker_exc_unsafe(); |
|
|
|
return require_host ? host_available() : static_cast<bool>(info->ptr); |
|
|
|
}); |
|
|
|
RECORD_EVENT(TensorWaitPropFinishEvent, info->id, m_waitee_id, prop); |
|
|
|
MGB_RECORD_EVENT(TensorWaitPropFinishEvent, info->id, m_waitee_id, prop); |
|
|
|
m_waitee = nullptr; |
|
|
|
return info->ptr; |
|
|
|
} |
|
|
|
|
|
|
|
void ChannelImpl::notify_tensor_unsafe(TensorInfo* info) { |
|
|
|
if (info == m_waitee) { |
|
|
|
RECORD_EVENT(TensorNotifyPropEvent, info->id); |
|
|
|
MGB_RECORD_EVENT(TensorNotifyPropEvent, info->id); |
|
|
|
m_cv.notify_all(); |
|
|
|
} |
|
|
|
} |
|
|
@@ -1014,12 +1008,12 @@ void ChannelImpl::process_one_task(Command& icmd) { |
|
|
|
auto cmd_visitor = [&](const auto& cmd) { |
|
|
|
using T = std::decay_t<decltype(cmd)>; |
|
|
|
if constexpr (std::is_same_v<T, Put>) { |
|
|
|
RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Put); |
|
|
|
RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Put); |
|
|
|
MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); |
|
|
|
auto value = cmd.no_cache ? std::make_shared<Tensor>(cmd.value) : Tensor::make(cmd.value); |
|
|
|
RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); |
|
|
|
MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(cmd.value.comp_node())); |
|
|
|
produce_tensor(cmd.dest, std::move(value)); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::Put); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::Put); |
|
|
|
sample_on_device(cmd.dest->desc.comp_node, false); |
|
|
|
} else if constexpr (std::is_same_v<T, ApplyOp>) { |
|
|
|
for (auto& i : cmd.inputs) { |
|
|
@@ -1088,11 +1082,11 @@ void ChannelImpl::process_one_task(Command& icmd) { |
|
|
|
} |
|
|
|
} |
|
|
|
} else if constexpr (std::is_same_v<T, Del>) { |
|
|
|
RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Del); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Del); |
|
|
|
CompNode device = cmd.dest->desc.comp_node; |
|
|
|
uint64_t tensor_id = cmd.dest->id; |
|
|
|
free(cmd.dest); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, tensor_id, TensorCommandKind::Del); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, tensor_id, TensorCommandKind::Del); |
|
|
|
sample_on_device(device, false); |
|
|
|
} else if constexpr (std::is_same_v<T, GetValue>) { |
|
|
|
if (cmd.dest->invalid) return; |
|
|
@@ -1106,64 +1100,64 @@ void ChannelImpl::process_one_task(Command& icmd) { |
|
|
|
imperative_log_profile_end("GetValue"); |
|
|
|
} else if constexpr (std::is_same_v<T, SwapIn>) { |
|
|
|
if (cmd.dest->invalid) return; |
|
|
|
RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapIn); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapIn); |
|
|
|
produce_tensor(cmd.dest, Tensor::make(cmd.dest->h_value)); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapIn); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapIn); |
|
|
|
sample_on_device(cmd.dest->desc.comp_node, false); |
|
|
|
} else if constexpr (std::is_same_v<T, SwapOut>) { |
|
|
|
if (cmd.dest->invalid) return; |
|
|
|
RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapOut); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::SwapOut); |
|
|
|
cmd.dest->h_value = cmd.dest->ptr->get_value(); |
|
|
|
if (cmd.dest->evict_type == EvictType::NONE) { |
|
|
|
cmd.dest->evict_type = EvictType::SWAP; |
|
|
|
cmd.dest->status = TensorInfo::Swapped; |
|
|
|
release_tensor(cmd.dest); |
|
|
|
} |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapOut); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::SwapOut); |
|
|
|
sample_on_device(cmd.dest->desc.comp_node, false); |
|
|
|
} else if constexpr (std::is_same_v<T, Drop>) { |
|
|
|
if (cmd.dest->invalid) return; |
|
|
|
RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Drop); |
|
|
|
MGB_RECORD_EVENT(TensorCommandEvent, cmd.dest->id, TensorCommandKind::Drop); |
|
|
|
do_drop(cmd.dest, true); |
|
|
|
RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::Drop); |
|
|
|
MGB_RECORD_EVENT(TensorCommandFinishEvent, cmd.dest->id, TensorCommandKind::Drop); |
|
|
|
} else if constexpr (std::is_same_v<T, SetOption>) { |
|
|
|
options.set_option(cmd.key, cmd.value); |
|
|
|
} else if constexpr (std::is_same_v<T, StartProfile>) { |
|
|
|
RECORD_EVENT(StartProfileEvent); |
|
|
|
MGB_RECORD_EVENT(StartProfileEvent); |
|
|
|
CompNode::sync_all(); |
|
|
|
for (auto* info: cmd.capture_tensors) { |
|
|
|
RECORD_EVENT(TensorDeclareEvent, info->id, info->name); |
|
|
|
MGB_RECORD_EVENT(TensorDeclareEvent, info->id, info->name); |
|
|
|
if (info->status == TensorInfo::Produced) { |
|
|
|
// TODO: handle swap/drop |
|
|
|
RECORD_EVENT(TensorProduceEvent, info->id, info->desc.layout, info->desc.comp_node, info->ptr->dev_tensor().raw_ptr()); |
|
|
|
MGB_RECORD_EVENT(TensorProduceEvent, info->id, info->desc.layout, info->desc.comp_node, info->ptr->dev_tensor().raw_ptr()); |
|
|
|
} |
|
|
|
} |
|
|
|
CompNode::foreach([&](CompNode device){ |
|
|
|
if (Profiler::get_option("sample_rate", 0)) { |
|
|
|
sample_on_device(device, true); |
|
|
|
} |
|
|
|
RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
MGB_RECORD_EVENT(RecordDeviceEvent, Timer::record_device(device)); |
|
|
|
}); |
|
|
|
RECORD_EVENT(StartProfileFinishEvent); |
|
|
|
MGB_RECORD_EVENT(StartProfileFinishEvent); |
|
|
|
} else if constexpr (std::is_same_v<T, StopProfile>) { |
|
|
|
RECORD_EVENT(StopProfileEvent); |
|
|
|
MGB_RECORD_EVENT(StopProfileEvent); |
|
|
|
for (auto* info: cmd.escape_tensors) { |
|
|
|
bool has_value = info->status == TensorInfo::Produced; |
|
|
|
if (has_value) { |
|
|
|
RECORD_EVENT(TensorReleaseEvent, info->id); |
|
|
|
MGB_RECORD_EVENT(TensorReleaseEvent, info->id); |
|
|
|
} |
|
|
|
RECORD_EVENT(TensorEraseEvent, info->id); |
|
|
|
MGB_RECORD_EVENT(TensorEraseEvent, info->id); |
|
|
|
} |
|
|
|
CompNode::foreach([&](CompNode device){ |
|
|
|
if (Profiler::get_option("sample_rate", 0)) { |
|
|
|
sample_on_device(device, true); |
|
|
|
} |
|
|
|
}); |
|
|
|
RECORD_EVENT(StopProfileFinishEvent); |
|
|
|
MGB_RECORD_EVENT(StopProfileFinishEvent); |
|
|
|
} else if constexpr (std::is_same_v<T, PushScope>) { |
|
|
|
RECORD_EVENT(ScopeEvent, cmd.scope_name); |
|
|
|
MGB_RECORD_EVENT(ScopeEvent, cmd.scope_name); |
|
|
|
} else if constexpr (std::is_same_v<T, PopScope>) { |
|
|
|
RECORD_EVENT(ScopeFinishEvent, cmd.scope_name); |
|
|
|
MGB_RECORD_EVENT(ScopeFinishEvent, cmd.scope_name); |
|
|
|
} else { |
|
|
|
static_assert(!std::is_same_v<T, T>); |
|
|
|
} |
|
|
@@ -1186,7 +1180,7 @@ void ChannelImpl::process_one_task(Command& icmd) { |
|
|
|
cmd.dest->invalid = true; |
|
|
|
} |
|
|
|
m_worker_exc = std::current_exception(); |
|
|
|
RECORD_EVENT(WorkerExceptionEvent); |
|
|
|
MGB_RECORD_EVENT(WorkerExceptionEvent); |
|
|
|
if (m_waitee) { |
|
|
|
notify_tensor_unsafe(m_waitee); |
|
|
|
} |
|
|
@@ -1347,7 +1341,7 @@ void ChannelImpl::push_scope(std::string name) { |
|
|
|
mgb_assert(check_available(), "Channel already closed"); |
|
|
|
auto& state = get_channel_state(); |
|
|
|
state.stack_manager.enter(name); |
|
|
|
RECORD_EVENT(ScopeEvent, name); |
|
|
|
MGB_RECORD_EVENT(ScopeEvent, name); |
|
|
|
m_buffer.enqueue(PushScope{name}); |
|
|
|
} |
|
|
|
|
|
|
@@ -1356,7 +1350,7 @@ void ChannelImpl::pop_scope(std::string name) { |
|
|
|
mgb_assert(check_available(), "Channel already closed"); |
|
|
|
auto& state = get_channel_state(); |
|
|
|
state.stack_manager.exit(name); |
|
|
|
RECORD_EVENT(ScopeFinishEvent, name); |
|
|
|
MGB_RECORD_EVENT(ScopeFinishEvent, name); |
|
|
|
m_buffer.enqueue(PopScope{name}); |
|
|
|
} |
|
|
|
|
|
|
@@ -1376,9 +1370,9 @@ void ChannelImpl::sample_on_device(CompNode device, bool force) { |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
RECORD_EVENT(SampleDeviceEvent, device); |
|
|
|
MGB_RECORD_EVENT(SampleDeviceEvent, device); |
|
|
|
auto [total, free] = device.get_mem_status_bytes(); |
|
|
|
RECORD_EVENT(SampleDeviceFinishEvent, device, total, free); |
|
|
|
MGB_RECORD_EVENT(SampleDeviceFinishEvent, device, total, free); |
|
|
|
} |
|
|
|
|
|
|
|
void ChannelImpl::DynamicSublinear::pin(const SmallVector<TensorInfo*>& vec) { |
|
|
|