refactor(profiler): add state structs to replay recorded events

GitOrigin-RevId: 16a1f5d7ba
4 years ago · 5cb35c1bed
--- a/imperative/src/impl/profiler/states.h
+++ b/imperative/src/impl/profiler/states.h
@@ -0,0 +1,153 @@
 #pragma once

 #include <set>
 #include <any>
 #include <typeindex>

 #include "megbrain/tensor.h"

 namespace mgb::imperative::profiler {

 struct ProfileDeviceState {
    int64_t index;
    CompNode device;
    std::shared_ptr<CompNode::Event> base_event;
    uint64_t base_time; //in ns
 };

 struct ProfileWorkerState {

 };

 struct ProfileTensorState {
    uint64_t id;
    TensorLayout layout;
    CompNode device;
    std::string name;
    uint64_t produced = 0;
    uint64_t living_time = 0;

    size_t size_in_bytes() const {
        if (!layout.dtype.valid()) {
            return 0;
        }
        return layout.dtype.size(layout.total_nr_elems());
    }
 };

 struct ProfileStaticsState {
    size_t op_enqueue_count = 0;
    size_t op_execute_count = 0;
    size_t wait_value_count = 0;
    size_t wait_shape_count = 0;
    size_t exception_count = 0;
    size_t infer_shape_valid_count = 0;
    size_t infer_shape_invalid_count = 0;
    size_t alive_tensor_count = 0;
    size_t produce_tensor_count = 0;
    size_t erase_tensor_count = 0;
    size_t wait_prop_count = 0;
    size_t redundant_tensor_count = 0;
 };

 struct ProfileOperatorState {
    uint64_t id;
    std::string name;
    SmallVector<uint64_t> inputs;
    SmallVector<uint64_t> outputs;
    CompNode device;

    uint64_t host_begin;
    uint64_t host_end;
    std::shared_ptr<CompNode::Event> device_begin;
    std::shared_ptr<CompNode::Event> device_end;
 };

 struct ProfileThreadState {
    std::thread::id tid;
    int64_t index;
    std::vector<std::string> scope_stack;
 };

 template <typename TProp>
 struct ProfileTensorPropPair {
    uint64_t id;
    TProp value;

    bool operator<(const ProfileTensorPropPair& lhs) const {
        return value == lhs.value ? id < lhs.id : value < lhs.value;
    }

    bool operator==(const ProfileTensorPropPair& lhs) const {
        return id == lhs.id && value == lhs.value;
    }

    bool operator>(const ProfileTensorPropPair& lhs) const {
        return value == lhs.value ? id > lhs.id : value > lhs.value;
    }
 };

 using ProfileTensorSizePair = ProfileTensorPropPair<size_t>;
 using ProfileTensorProducedPair = ProfileTensorPropPair<uint64_t>;

 struct GeneralTensorEvent {
    uint64_t tensor_id;
    std::type_index type;
 };

 struct ProfileState {
    std::unordered_map<uint64_t, ProfileTensorState> tensors;
    std::unordered_map<uint64_t, ProfileOperatorState> operators;
    std::unordered_map<std::string, uint64_t> tensor_name_counter;
    std::set<ProfileTensorSizePair> tensors_by_size;
    std::set<ProfileTensorSizePair> tensors_by_produced;
    ProfileWorkerState worker;
    ProfileStaticsState statics;
    std::unordered_map<std::thread::id, ProfileThreadState> threads;
    CompNode::UnorderedMap<ProfileDeviceState> devices;

    ProfileThreadState& operator[](std::thread::id tid) {
        if (threads.count(tid) == 0) {
            threads[tid].tid = tid;
            threads[tid].index = threads.size();
        }
        return threads[tid];
    }

    ProfileDeviceState& operator[](CompNode device) {
        if (devices.count(device) == 0) {
            devices[device].device = device;
            devices[device].index = devices.size();
        }
        return devices[device];
    }

    std::vector<uint64_t> top_k_tensor_in_device(CompNode device, size_t k) {
        std::vector<uint64_t> results;
        for (auto iter = tensors_by_size.rbegin(); iter != tensors_by_size.rend(); ++iter) {
            if (!k) {
                break;
            }
            if (tensors[iter->id].device == device) {
                results.push_back(iter->id);
                --k;
            }
        }
        return results;
    }

    std::string concat_scope(std::thread::id tid) {
        auto& scope_stack = threads[tid].scope_stack;
        if (scope_stack.empty()) {
            return {};
        }
        std::string result = scope_stack[0];
        for (size_t i = 1; i < scope_stack.size(); ++i) {
            result += "::";
            result += scope_stack[i];
        }
        return result;
    }
 };

 }