You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

graph_info_analyze.py 7.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. #! /usr/bin/env python3
  2. # MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
  3. #
  4. # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  5. #
  6. # Unless required by applicable law or agreed to in writing,
  7. # software distributed under the License is distributed on an
  8. # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  9. import argparse
  10. import json
  11. import math
  12. import os
  13. from megengine.utils.module_stats import sizeof_fmt
  14. from megengine.utils.tensorboard import SummaryWriterExtend
  15. def load_single_graph(fpath):
  16. with open(fpath) as fin:
  17. data = json.load(fin)
  18. for t in ["operator", "var"]:
  19. data[t] = {int(i): j for i, j in data[t].items()}
  20. gvars = data["var"]
  21. for oid, i in data["operator"].items():
  22. i["input"] = list(map(int, i["input"]))
  23. out = i["output"] = list(map(int, i["output"]))
  24. for j in out:
  25. gvars[j]["owner_opr"] = oid
  26. for var in data["var"].values():
  27. mp = var.get("mem_plan", None)
  28. if mp:
  29. var["shape"] = "{" + ",".join(map(str, mp["layout"]["shape"])) + "}"
  30. else:
  31. var["shape"] = "<?>"
  32. return data
  33. def comp_graph_plotter(input, writer):
  34. jgraph = load_single_graph(input)
  35. all_oprs = jgraph["operator"]
  36. all_vars = jgraph["var"]
  37. for i in all_oprs:
  38. opr = all_oprs[i]
  39. if opr["type"] == "ImmutableTensor":
  40. continue
  41. inputlist = []
  42. for var in opr["input"]:
  43. inpopr = all_oprs[all_vars[var]["owner_opr"]]
  44. if inpopr["type"] == "ImmutableTensor":
  45. continue
  46. inputlist.append(all_oprs[all_vars[var]["owner_opr"]]["name"])
  47. writer.add_node_raw(opr["name"], opr["type"], inputlist)
  48. writer.add_graph_by_node_raw_list()
  49. def load_mem_info(fpath):
  50. with open(fpath) as fin:
  51. data = json.load(fin)
  52. oprs = data["opr"]
  53. for oid, i in oprs.items():
  54. i["size"] = 0
  55. for oid, i in data["chunk"].items():
  56. i["size"] = int(i["logic_addr_end"]) - int(i["logic_addr_begin"])
  57. data["peak_memory"] = 0
  58. data["weight_memory"] = 0
  59. for oid, i in data["chunk"].items():
  60. if i["type"] == "static_mem":
  61. i["owner_opr"] = oprs[i["time_begin"]]["name"]
  62. life_begin = int(i["time_begin"])
  63. life_end = int(i["time_end"])
  64. if i["overwrite_dest_id"] != "-1":
  65. life_begin = life_begin + 1
  66. if data["peak_memory"] < int(i["logic_addr_end"]):
  67. data["peak_memory"] = int(i["logic_addr_end"])
  68. for j in range(life_begin, life_end):
  69. oprs[str(j)]["size"] = oprs[str(j)]["size"] + i["size"]
  70. elif i["type"] == "weight_mem":
  71. data["weight_memory"] += int(i["logic_addr_end"]) - int(
  72. i["logic_addr_begin"]
  73. )
  74. return data
  75. def peak_mem_regist(input, writer):
  76. jmem = load_mem_info(input)
  77. writer.add_text(
  78. "PEAK_MEMORY_SIZE",
  79. [sizeof_fmt(jmem["peak_memory"]) + "(" + str(jmem["peak_memory"]) + " B)"],
  80. )
  81. writer.add_text(
  82. "WEIGHT_MEMORY_SIZE",
  83. [sizeof_fmt(jmem["weight_memory"]) + "(" + str(jmem["weight_memory"]) + " B)"],
  84. )
  85. all_oprs = jmem["opr"]
  86. all_chunks = jmem["chunk"]
  87. max_size = 0
  88. max_size_oprs = []
  89. # get oprs that reach the max memory
  90. for oid, i in all_oprs.items():
  91. if i["size"] == max_size:
  92. max_size_oprs.append(int(i["id"]))
  93. elif i["size"] > max_size:
  94. max_size = i["size"]
  95. max_size_oprs.clear()
  96. max_size_oprs.append(int(i["id"]))
  97. # get component of chunks
  98. max_size_oprs.sort()
  99. opr2chunks = []
  100. num = len(max_size_oprs)
  101. for i in range(num):
  102. opr2chunks.append([])
  103. for oid, i in all_chunks.items():
  104. if i["type"] == "static_mem":
  105. life_begin = int(i["time_begin"])
  106. life_end = int(i["time_end"])
  107. if i["overwrite_dest_id"] != "-1":
  108. life_begin = life_begin + 1
  109. if max_size_oprs[0] >= life_end or max_size_oprs[-1] < life_begin:
  110. continue
  111. for j in range(num):
  112. if max_size_oprs[j] >= life_end:
  113. break
  114. elif max_size_oprs[j] >= life_begin:
  115. opr2chunks[j].append(i["id"])
  116. peak_num = 0
  117. for i in range(num):
  118. suffix_1 = "PEAK" + str(peak_num)
  119. if i - 1 > 0 and opr2chunks[i - 1] == opr2chunks[i]:
  120. continue
  121. max_num = 0
  122. opr2chunks[i] = sorted(
  123. opr2chunks[i],
  124. key=lambda chunk_id: all_chunks[chunk_id]["size"],
  125. reverse=True,
  126. )
  127. writer.add_text(
  128. suffix_1 + "/" + "<SUMMARY_INFO>",
  129. ["reached_max_opr_name: " + all_oprs[str(max_size_oprs[i])]["name"]],
  130. 0,
  131. )
  132. writer.add_text(
  133. suffix_1 + "/" + "<SUMMARY_INFO>",
  134. ["max_used_size: " + sizeof_fmt(max_size)],
  135. 1,
  136. )
  137. for j in opr2chunks[i]:
  138. suffix_2 = "MAX" + str(max_num)
  139. j_size = sizeof_fmt(all_chunks[j]["size"])
  140. j_percent = round(all_chunks[j]["size"] / max_size * 100, 3)
  141. writer.add_text(
  142. suffix_1 + "/" + suffix_2 + "_OPR",
  143. ["percent: " + str(j_percent) + "%"],
  144. 0,
  145. )
  146. writer.add_text(
  147. suffix_1 + "/" + suffix_2 + "_OPR", ["memory_size: " + j_size], 1,
  148. )
  149. writer.add_text(
  150. suffix_1 + "/" + suffix_2 + "_OPR",
  151. ["owner_opr: " + all_chunks[j]["owner_opr"]],
  152. 2,
  153. )
  154. writer.add_node_raw_attributes(
  155. all_chunks[j]["owner_opr"],
  156. {
  157. "memory_" + all_chunks[j]["id"]: j_size,
  158. "memory_percent": str(j_percent) + "%",
  159. "summary_memory_" + str(peak_num): sizeof_fmt(max_size),
  160. },
  161. )
  162. writer.add_node_raw_name_suffix(
  163. all_chunks[j]["owner_opr"], "_" + suffix_1 + "_" + suffix_2
  164. )
  165. max_num += 1
  166. peak_num += 1
  167. writer.add_graph_by_node_raw_list()
  168. def convert(args):
  169. file_process_order = {
  170. "graph.json": comp_graph_plotter,
  171. "StaticMemoryInfo.json": peak_mem_regist,
  172. }
  173. g = os.walk(args.input)
  174. for path, dir_list, file_list in g:
  175. out_path = path.replace(args.input, args.output)
  176. writer = SummaryWriterExtend(out_path)
  177. for key, value in file_process_order.items():
  178. if key in file_list:
  179. value(os.path.join(path, key), writer)
  180. def main():
  181. """`graph_info_analyze.py` is uesed to convert json dumped by `VisableDataSet`
  182. class to logs which can be read by python `tensorboard`.
  183. Now `get_static_memory_alloc_info()` support this feature,it will dump a dir
  184. which can be convert by `graph_info_analyze.py`.
  185. Examples:
  186. .. code-block:: shell
  187. graph_info_analyze.py -i <input_dir_name> -o <output_dir_name>
  188. tensorboard --logdir <output_dir_name>
  189. """
  190. parser = argparse.ArgumentParser(
  191. "convert json dumped by c to logs which can be read by python tensorboard",
  192. formatter_class=argparse.ArgumentDefaultsHelpFormatter,
  193. )
  194. parser.add_argument(
  195. "-i", "--input", required=True, help="input dirctor name(c tensorboard info)"
  196. )
  197. parser.add_argument(
  198. "-o",
  199. "--output",
  200. required=True,
  201. help="output dirctor name(python tensorboard info)",
  202. )
  203. args = parser.parse_args()
  204. convert(args)
  205. if __name__ == "__main__":
  206. main()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台