feat(mgb): add tensorboard tool c++ layer interface and python parsing

GitOrigin-RevId: ae98bc887f
3 years ago · 11a383bc79
--- a/imperative/python/megengine/tools/README.md
+++ b/imperative/python/megengine/tools/README.md
@@ -5,4 +5,4 @@ Use these files in the following way (replace `xxx` to specific file name, like

 ```
 python -m megengine.tools.xxx
 ```
 ```
--- a/imperative/python/megengine/tools/graph_info_analyze.py
+++ b/imperative/python/megengine/tools/graph_info_analyze.py
@@ -0,0 +1,235 @@
 #! /usr/bin/env python3
 # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 #
 # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 import argparse
 import json
 import math
 import os

 from megengine.utils.module_stats import sizeof_fmt
 from megengine.utils.tensorboard import SummaryWriterExtend


 def load_single_graph(fpath):
    with open(fpath) as fin:
        data = json.load(fin)

        for t in ["operator", "var"]:
            data[t] = {int(i): j for i, j in data[t].items()}

        gvars = data["var"]
        for oid, i in data["operator"].items():
            i["input"] = list(map(int, i["input"]))
            out = i["output"] = list(map(int, i["output"]))
            for j in out:
                gvars[j]["owner_opr"] = oid

        for var in data["var"].values():
            mp = var.get("mem_plan", None)
            if mp:
                var["shape"] = "{" + ",".join(map(str, mp["layout"]["shape"])) + "}"
            else:
                var["shape"] = "<?>"

    return data


 def comp_graph_plotter(input, writer):
    jgraph = load_single_graph(input)
    all_oprs = jgraph["operator"]
    all_vars = jgraph["var"]
    for i in all_oprs:
        opr = all_oprs[i]
        if opr["type"] == "ImmutableTensor":
            continue
        inputlist = []
        for var in opr["input"]:
            inpopr = all_oprs[all_vars[var]["owner_opr"]]
            if inpopr["type"] == "ImmutableTensor":
                continue
            inputlist.append(all_oprs[all_vars[var]["owner_opr"]]["name"])
        writer.add_node_raw(opr["name"], opr["type"], inputlist)
    writer.add_graph_by_node_raw_list()


 def load_mem_info(fpath):
    with open(fpath) as fin:
        data = json.load(fin)

        oprs = data["opr"]
        for oid, i in oprs.items():
            i["size"] = 0

        for oid, i in data["chunk"].items():
            i["size"] = int(i["logic_addr_end"]) - int(i["logic_addr_begin"])

        data["peak_memory"] = 0
        data["weight_memory"] = 0
        for oid, i in data["chunk"].items():
            if i["type"] == "static_mem":
                i["owner_opr"] = oprs[i["time_begin"]]["name"]
                life_begin = int(i["time_begin"])
                life_end = int(i["time_end"])
                if i["overwrite_dest_id"] != "-1":
                    life_begin = life_begin + 1
                if data["peak_memory"] < int(i["logic_addr_end"]):
                    data["peak_memory"] = int(i["logic_addr_end"])
                for j in range(life_begin, life_end):
                    oprs[str(j)]["size"] = oprs[str(j)]["size"] + i["size"]
            elif i["type"] == "weight_mem":
                data["weight_memory"] += int(i["logic_addr_end"]) - int(
                    i["logic_addr_begin"]
                )
    return data


 def peak_mem_regist(input, writer):
    jmem = load_mem_info(input)
    writer.add_text(
        "PEAK_MEMORY_SIZE",
        [sizeof_fmt(jmem["peak_memory"]) + "(" + str(jmem["peak_memory"]) + " B)"],
    )
    writer.add_text(
        "WEIGHT_MEMORY_SIZE",
        [sizeof_fmt(jmem["weight_memory"]) + "(" + str(jmem["weight_memory"]) + " B)"],
    )

    all_oprs = jmem["opr"]
    all_chunks = jmem["chunk"]

    max_size = 0
    max_size_oprs = []
    # get oprs that reach the max memory
    for oid, i in all_oprs.items():
        if i["size"] == max_size:
            max_size_oprs.append(int(i["id"]))
        elif i["size"] > max_size:
            max_size = i["size"]
            max_size_oprs.clear()
            max_size_oprs.append(int(i["id"]))
    # get component of chunks
    max_size_oprs.sort()
    opr2chunks = []
    num = len(max_size_oprs)
    for i in range(num):
        opr2chunks.append([])
    for oid, i in all_chunks.items():
        if i["type"] == "static_mem":
            life_begin = int(i["time_begin"])
            life_end = int(i["time_end"])
            if i["overwrite_dest_id"] != "-1":
                life_begin = life_begin + 1
            if max_size_oprs[0] >= life_end or max_size_oprs[-1] < life_begin:
                continue
            for j in range(num):
                if max_size_oprs[j] >= life_end:
                    break
                elif max_size_oprs[j] >= life_begin:
                    opr2chunks[j].append(i["id"])

    peak_num = 0
    for i in range(num):
        suffix_1 = "PEAK" + str(peak_num)
        if i - 1 > 0 and opr2chunks[i - 1] == opr2chunks[i]:
            continue
        max_num = 0
        opr2chunks[i] = sorted(
            opr2chunks[i],
            key=lambda chunk_id: all_chunks[chunk_id]["size"],
            reverse=True,
        )
        writer.add_text(
            suffix_1 + "/" + "<SUMMARY_INFO>",
            ["reached_max_opr_name:    " + all_oprs[str(max_size_oprs[i])]["name"]],
            0,
        )
        writer.add_text(
            suffix_1 + "/" + "<SUMMARY_INFO>",
            ["max_used_size:    " + sizeof_fmt(max_size)],
            1,
        )

        for j in opr2chunks[i]:
            suffix_2 = "MAX" + str(max_num)
            j_size = sizeof_fmt(all_chunks[j]["size"])
            j_percent = round(all_chunks[j]["size"] / max_size * 100, 3)

            writer.add_text(
                suffix_1 + "/" + suffix_2 + "_OPR",
                ["percent:    " + str(j_percent) + "%"],
                0,
            )
            writer.add_text(
                suffix_1 + "/" + suffix_2 + "_OPR", ["memory_size:    " + j_size], 1,
            )
            writer.add_text(
                suffix_1 + "/" + suffix_2 + "_OPR",
                ["owner_opr:    " + all_chunks[j]["owner_opr"]],
                2,
            )

            writer.add_node_raw_attributes(
                all_chunks[j]["owner_opr"],
                {
                    "memory_" + all_chunks[j]["id"]: j_size,
                    "memory_percent": str(j_percent) + "%",
                    "summary_memory_" + str(peak_num): sizeof_fmt(max_size),
                },
            )
            writer.add_node_raw_name_suffix(
                all_chunks[j]["owner_opr"], "_" + suffix_1 + "_" + suffix_2
            )
            max_num += 1
        peak_num += 1

    writer.add_graph_by_node_raw_list()


 def convert(args):
    file_process_order = {
        "graph.json": comp_graph_plotter,
        "StaticMemoryInfo.json": peak_mem_regist,
    }
    g = os.walk(args.input)
    for path, dir_list, file_list in g:
        out_path = path.replace(args.input, args.output)
        writer = SummaryWriterExtend(out_path)
        for key, value in file_process_order.items():
            if key in file_list:
                value(os.path.join(path, key), writer)


 def main():
    """`graph_info_analyze.py` is uesed to convert json dumped by `VisableDataSet` 
    class to logs which can be read by python `tensorboard`.
    Now `get_static_memory_alloc_info()` support this feature,it will dump a dir 
    which can be convert by `graph_info_analyze.py`.
    Examples::
        graph_info_analyze.py -i <input_dir_name> -o <output_dir_name>
        tensorboard --logdir <output_dir_name>
    """
    parser = argparse.ArgumentParser(
        "convert json dumped by c to logs which can be read by python tensorboard",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "-i", "--input", required=True, help="input dirctor name(c tensorboard info)"
    )
    parser.add_argument(
        "-o",
        "--output",
        required=True,
        help="output dirctor name(python tensorboard info)",
    )
    args = parser.parse_args()

    convert(args)


 if __name__ == "__main__":
    main()
--- a/src/core/impl/utils/visable_data_set.cpp
+++ b/src/core/impl/utils/visable_data_set.cpp
@@ -0,0 +1,131 @@
 /**
 * \file src/core/impl/utils/tensorboard.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 */

 #include "megbrain/utils/visable_data_set.h"
 #if MGB_ENABLE_JSON
 #include <fstream>
 #include <iostream>

 using namespace mgb;

 #if WIN32
 #include <direct.h>
 #include <fcntl.h>
 #include <io.h>
 #define getcwd _getcwd
 namespace {

 auto mkdir(const char* path, int) {
    return _mkdir(path);
 }

 }  // namespace
 #else
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 #endif

 namespace {
 #if defined(IOS)
 #pragma message "build test on iOS; need ios_get_mgb_output_dir() to be defined"
 extern "C" void ios_get_mgb_output_dir(char** dir);
 #endif

 std::string output_file(std::string dir_name, const std::string& fname) {
    static std::string cwd;
    static std::mutex cwd_mtx;
    MGB_LOCK_GUARD(cwd_mtx);
    if (cwd.empty()) {
 #if defined(IOS)
        char* buf = nullptr;
        ios_get_mgb_output_dir(&buf);
 #else
        auto buf = getcwd(nullptr, 0);
 #endif
        mgb_assert(buf);
        cwd = buf;
        free(buf);
        dir_name = dir_name + "/";
        for (size_t i = 0; i < dir_name.size(); i++) {
            size_t pos = dir_name.find("/", i);
            if (pos < dir_name.size() && pos - i > 1) {
                cwd.append("/" + dir_name.substr(i, pos - i));
                mkdir(cwd.c_str(), 0755);
                i = pos;
            }
        }
    }
    if (fname.empty())
        return cwd;
    auto ret = cwd + "/" + fname;
    FILE* fout = fopen(ret.c_str(), "w");
    mgb_assert(fout, "failed to open %s: %s", ret.c_str(), strerror(errno));
    fclose(fout);

    return ret;
 }
 }  // namespace

 void VisableDataSet::draw_graph(std::shared_ptr<json::Value> graph_json) {
    graph_json->writeto_fpath(output_file(m_logdir, "graph.json"));
 }

 void VisableDataSet::dump_info(Content& c) {
    auto&& content_set = m_file2content[c.file_name()];
    content_set.insert(c.content_name());
    auto&& value_list =
            m_filecontent2value[c.file_name() + "/" + c.content_name()];
    value_list.push_back(c.to_json());
 }

 void VisableDataSet::write_to_file() {
    for (auto& i : m_file2content) {
        auto f_objptr = json::Object::make();
        auto&& f_obj = *f_objptr;
        for (auto& c : i.second) {
            auto c_objptr = json::Object::make();
            auto&& c_obj = *c_objptr;
            for (auto& j : m_filecontent2value[i.first + "/" + c]) {
                c_obj[(*j).cast_final_safe<json::Object>()["id"]
                              ->cast_final_safe<json::String>()
                              .get_impl()] = j;
            }
            f_obj[c] = c_objptr;
        }
        f_objptr->writeto_fpath(output_file(m_logdir, i.first));
    }
 }

 // const std::string PeakMemoryInfo::content() const {return }

 std::shared_ptr<json::Value> Chunk::to_json() const {
    auto objptr = json::Object::make();
    auto&& obj = *objptr;
    obj["id"] = json::String::make(id());
    obj["type"] = json::String::make(m_type);
    obj["time_begin"] = json::String::make(m_time_begin);
    obj["time_end"] = json::String::make(m_time_end);
    obj["logic_addr_begin"] = json::String::make(m_logic_addr_begin);
    obj["logic_addr_end"] = json::String::make(m_logic_addr_end);
    obj["overwrite_dest_id"] = json::String::make(m_overwrite_dest_id);
    return objptr;
 }

 std::shared_ptr<json::Value> OprSeq::to_json() const {
    auto objptr = json::Object::make();
    auto&& obj = *objptr;
    obj["id"] = json::String::make(id());
    obj["name"] = json::String::make(m_name);
    return objptr;
 }
 #endif
--- a/src/core/include/megbrain/utils/visable_data_set.h
+++ b/src/core/include/megbrain/utils/visable_data_set.h
@@ -0,0 +1,88 @@
 /**
 * \file src/core/include/megbrain/utils/visable_data_set.h
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 */

 #pragma once
 #include "megbrain/utils/json.h"
 #if MGB_ENABLE_JSON
 #include <set>
 #include "megbrain/plugin/base.h"
 #include "megbrain/plugin/static_mem_record.h"

 namespace mgb {
 class VisableDataSet : public NonCopyableObj {
 private:
    const std::string m_logdir;
    std::unordered_map<std::string, std::set<std::string>> m_file2content;
    std::unordered_map<std::string, std::vector<std::shared_ptr<json::Value>>>
            m_filecontent2value;

 public:
    class Content {
    private:
        std::string m_file_name;
        std::string m_content_name;
        std::string m_id;

    public:
        Content(std::string file_name, std::string content_name, std::string id)
                : m_file_name(file_name),
                  m_content_name(content_name),
                  m_id(id) {}
        const std::string& file_name() const { return m_file_name; }
        const std::string& content_name() const { return m_content_name; }
        const std::string& id() const { return m_id; }
        virtual std::shared_ptr<json::Value> to_json() const = 0;
        virtual ~Content() = default;
    };
    VisableDataSet(std::string logdir) : m_logdir(logdir) {}

    void draw_graph(std::shared_ptr<json::Value> graph_json);

    void dump_info(Content& c);

    void write_to_file();
 };

 class Chunk : public VisableDataSet::Content {
 private:
    const char* enum_str[2] = {"static_mem", "weight_mem"};
    std::string m_type, m_time_begin, m_time_end, m_logic_addr_begin,
            m_logic_addr_end,
            m_overwrite_dest_id;  // m_overwriter_dest_id = "-1" means no
                                  // overwrite dest
 public:
    enum chunk_type { static_mem, weight_mem };

    Chunk(std::string id, chunk_type type, std::string time_begin,
          std::string time_end, std::string logic_addr_begin,
          std::string logic_addr_end, std::string overwrite_dest_id)
            : Content("StaticMemoryInfo.json", "chunk", id),
              m_type(enum_str[type]),
              m_time_begin(time_begin),
              m_time_end(time_end),
              m_logic_addr_begin(logic_addr_begin),
              m_logic_addr_end(logic_addr_end),
              m_overwrite_dest_id(overwrite_dest_id) {}
    std::shared_ptr<json::Value> to_json() const override;
 };

 class OprSeq : public VisableDataSet::Content {
 private:
    std::string m_id, m_name;

 public:
    OprSeq(std::string id, std::string opr_name)
            : Content("StaticMemoryInfo.json", "opr", id), m_name(opr_name) {}
    std::shared_ptr<json::Value> to_json() const override;
 };
 }  // namespace mgb
 #endif