# -*- coding: utf-8 -*- # MegEngine is Licensed under the Apache License, Version 2.0 (the "License") # # Copyright (c) 2014-2020 Megvii Inc. All rights reserved. # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. import argparse import os import pathlib import subprocess from megengine.utils.profile_analyze import main as profiler home = pathlib.Path(__file__).parent.absolute() script_path = os.path.join(str(home), "resnet50_perf.py") script_path = "python3 " + script_path prof_path = "prof.json" log_path = "log.txt" def print_log(msg: str, log: str = log_path): print(msg) with open(log, "a") as f: print(msg, file=f) def run_cmd(cmd: str, log: str = log_path) -> bool: stdout = subprocess.getoutput(cmd) token = "Wall time" gpu_msg = "GPU Usage" run_finished = False for line in stdout.split("\n"): if token in line: print(line) print_log("Run status: finished") run_finished = True if gpu_msg in line: print(line) if not run_finished: print_log("Run status: failed") with open(log, "a") as f: print(stdout, file=f) return run_finished if __name__ == "__main__": parser = argparse.ArgumentParser(description="ResNet50 train performance") parser.add_argument( "--run-debug-tool", action="store_true", help="run profiler and valgrind" ) parser.add_argument( "--run-parallel", action="store_true", help="run data parallel performance" ) parser.add_argument("--run-eager", action="store_false", help="run eager graph") args = parser.parse_args() f = open(log_path, "w") f.close() print_log("**************************************") print_log("Run ResNet 50 performance test with batch size = 64") print_log("**************************************") print_log("Run static graph with default opt level") cmd = script_path run_cmd(cmd) print_log("**************************************") print_log("Run static graph with conv fastrun") cmd = script_path + " --conv-fastrun=yes" run_cmd(cmd) print_log("**************************************") print_log("Run static graph with conv fastrun and JIT") cmd = script_path + " --conv-fastrun=yes --opt-level=3" run_cmd(cmd) print_log("**************************************") print_log("Run static graph with JIT, conv fastrun and without running step") cmd = script_path + " --conv-fastrun=yes --opt-level=3 --run-step=no" run_cmd(cmd) if args.run_eager: print_log("**************************************") print_log("Run static graph with default opt level and batch-size=8") cmd = script_path + " --batch-size=8" run_cmd(cmd) print_log("**************************************") print_log("Run eager graph with default opt level and batch-size=8") cmd = script_path run_cmd("MGE_DISABLE_TRACE=1 " + cmd + " --eager=yes") if args.run_debug_tool: print_log("**************************************") print_log("Run with dump_prof") cmd = script_path + " --dump-prof=" + prof_path if run_cmd(cmd): print("Printing profiling result") profiler([prof_path, "--aggregate-by=type", "--aggregate=sum", "-t 10"]) print_log("**************************************") print_log("Run with valgrind massif") massif_out = "massif.out" # Use 0.01% as valgrind massif threashold # A smaller value reports more details but it may take longer time to analyze the log # Change it accordingly. mem_threshold = 0.01 cmd = ( "valgrind --tool=massif --threshold={} --massif-out-file=".format( mem_threshold ) + massif_out + " " ) cmd = cmd + script_path + " --warm-up=no --run-iter=20" run_cmd(cmd) ms_print_file = "massif.out.ms_print" cmd = ( "ms_print --threshold={} ".format(mem_threshold) + massif_out + " > " + ms_print_file ) os.system(cmd) cmd = "head -n 33 " + ms_print_file os.system(cmd) print_log("Read {} for detailed massif output".format(ms_print_file)) if args.run_parallel: print_log("**************************************") tmp_out = "/dev/null" # Change server and port to run at your system server = "localhost" port = "2222" for num_gpu in (2, 4, 8): print_log("Run with {} GPUs".format(num_gpu)) cmd = script_path + " --num-gpu={} --server={} --port={} ".format( num_gpu, server, port ) for i in range(num_gpu - 1): irank = num_gpu - 1 - i os.system( cmd + " --device={}".format(irank) + " 1>{} 2>{} &".format(tmp_out, tmp_out) ) if not run_cmd(cmd): break print_log("**************************************") print_log("**************************************") print("Finish run, summary:") cmd = 'grep "Run with\|Wall time\|Run status\|Error\|GPU Usage" ' + log_path os.system(cmd)