From f7b039597649487b8b15c5c2736a0433202db404 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Fri, 13 May 2022 15:12:05 +0800 Subject: [PATCH] perf(mgb/compile): improve compile time according the file map of compile time GitOrigin-RevId: d7b3a792831d4e599105b6affb4db47ab1d3e4ca --- dnn/src/CMakeLists.txt | 30 ++++++++------ .../cmake-build/utils/adjust_compile_opr/README.md | 9 +++++ .../adjust_compile_opr/priority_compile_opr.txt | 46 ++++++++++++++++++++++ .../adjust_compile_opr/sort_compile_time_map.py | 41 +++++++++++++++++++ 4 files changed, 113 insertions(+), 13 deletions(-) create mode 100755 scripts/cmake-build/utils/adjust_compile_opr/README.md create mode 100644 scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt create mode 100644 scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py diff --git a/dnn/src/CMakeLists.txt b/dnn/src/CMakeLists.txt index 861d4d09..80cfc43e 100644 --- a/dnn/src/CMakeLists.txt +++ b/dnn/src/CMakeLists.txt @@ -188,20 +188,24 @@ if(MGE_WITH_CUDA) gen_cutlass_kimpl(dwconv2d_dgrad tensorop884 CUTLASS_SOURCES) gen_cutlass_kimpl(dwconv2d_wgrad simt CUTLASS_SOURCES) gen_cutlass_kimpl(dwconv2d_wgrad tensorop884 CUTLASS_SOURCES) - # Compile the following opr first + list(PREPEND CUSOURCES ${CUTLASS_SOURCES}) + + # Compile the following file first, the priority_compile_opr.txt is generated by + # ../../scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py. file( - GLOB_RECURSE - PRIORITY_DIRS - cuda/type_cvt/*cu - cuda/conv_bias/*cu - cuda/reduce/*cu - cuda/relayout/*cu - cuda/relayout_format/*cu - cuda/powc/*cu - cuda/warp_perspective/*cu) - list(PREPEND PRIORITY_DIRS ${CUTLASS_SOURCES}) - list(PREPEND CUSOURCES ${PRIORITY_DIRS}) - list(REMOVE_DUPLICATES CUSOURCES) + STRINGS + ${CMAKE_CURRENT_SOURCE_DIR}/../../scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt + PRIORITY_FILES_TMPS) + foreach(PRIORITY_FILES_TMP ${PRIORITY_FILES_TMPS}) + foreach(CUSOURCE ${CUSOURCES}) + string(FIND ${CUSOURCE} ${PRIORITY_FILES_TMP} PRIORITY_FILES_FOUND) + if(NOT ${PRIORITY_FILES_FOUND} EQUAL -1) + list(APPEND PRIORITY_FILES ${CUSOURCE}) + list(REMOVE_ITEM CUSOURCES ${CUSOURCE}) + endif() + endforeach(CUSOURCE) + endforeach(PRIORITY_FILES_TMP) + list(PREPEND CUSOURCES ${PRIORITY_FILES}) list(PREPEND SOURCES ${CUSOURCES}) endif() diff --git a/scripts/cmake-build/utils/adjust_compile_opr/README.md b/scripts/cmake-build/utils/adjust_compile_opr/README.md new file mode 100755 index 00000000..c01602ee --- /dev/null +++ b/scripts/cmake-build/utils/adjust_compile_opr/README.md @@ -0,0 +1,9 @@ +# The main purpose of this folder is to adjust the compilation order of megdnn to improve the overall compilation time.If you add a new opr and find that it significantly slows down the compilation time, you can manually add the opr to the front of priority_compile_opr.txt. If you add more opr, you can also run: python3 sort_compile_time_map.py. However, this operation will be very time-consuming because it is a single-threaded compilation. + +- priority_compile_opr.txt: Compile order sorted by compile time +- sort_compile_time_map.py: Generate the compile script for the above two files +## Usage +```bash +python3 sort_compile_time_map.py +``` + diff --git a/scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt b/scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt new file mode 100644 index 00000000..973735ee --- /dev/null +++ b/scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt @@ -0,0 +1,46 @@ +cuda/type_cvt +cuda/cutlass +cuda/conv_bias +cuda/reduce +cuda/relayout +cuda/add_update +cuda/relayout_format +cuda/elemwise +cuda/argmxx +cuda/powc +cuda/elemwise_multi_type +cuda/cumsum +cuda/local_share +cuda/argsort +cuda/warp_perspective +cuda/local +cuda/batch_conv_bias +cuda/warp_affine +cuda/fake_quant +cuda/remap +cuda/cond_take +cuda/rng +cuda/tqt +cuda/resize +cuda/lsq +cuda/convolution3d +cuda/check_non_finite +cuda/indexing_one_hot +cuda/indexing_multi_axis_vec +cuda/convpooling +cuda/convolution +cuda/tile +cuda/padding +cuda/matrix_mul +cuda/gaussian_blur +cuda/flip +cuda/checksum +cuda/sliding_window_transpose +cuda/rotate +cuda/roi_pooling +cuda/roi_align +cuda/repeat +cuda/param_pack +cuda/linspace +cuda/layer_norm +cuda/images2neibs diff --git a/scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py b/scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py new file mode 100644 index 00000000..9372555a --- /dev/null +++ b/scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py @@ -0,0 +1,41 @@ +# Get the top N files with the highest compilation time in megdnn, so that they can be compiled first + +from click import command +import subprocess +import re +import tempfile +def topN_file(src_file_path,des_file_path,N): + src = open(src_file_path) + des = open(des_file_path, "w") + lines = src.readlines() + file_map = {} + for index,line in enumerate(lines): + if ("Building C" in line) and ("megdnn.dir" in line): + if "Elapsed time: " in lines[index + 1]: + key = line[line.find("megdnn.dir"):line.find(".o")] + value = lines[index + 1][lines[index + 1].find("Elapsed time: ") + 14:lines[index + 1].find(" s. ")] + file_map[key] = value + a = sorted(file_map.items(), key=lambda x: int(x[1]), reverse=True) + result_file = a[:N] + result_opr = [] + for i in result_file: + key= '/'.join(list(re.findall(r"megdnn.dir\/(.*?)\/(.*?)[\.\/]",i[0])[0])) + if key not in result_opr: + result_opr.append(key) + des.write(key + "\n") + src.close() + des.close() + return result_opr + +def compile(cmd:str,dir:str, failed_name=3): + for i in range(failed_name): + subprocess.run(cmd, shell=True,cwd=t) + +if __name__ == '__main__': + cmd = f''' +cmake .. -DMGE_PROFILE_COMPILE_TIME=ON +time make -j1 megdnn | tee megdnn_map_compile_time.txt +''' + with tempfile.TemporaryDirectory(dir = "../../../../") as t: + compile(cmd,t) + topN_file(t + "/megdnn_map_compile_time.txt","./priority_compile_opr.txt",500) \ No newline at end of file