GitOrigin-RevId: d7b3a79283
release-1.10
@@ -188,20 +188,24 @@ if(MGE_WITH_CUDA) | |||||
gen_cutlass_kimpl(dwconv2d_dgrad tensorop884 CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_dgrad tensorop884 CUTLASS_SOURCES) | ||||
gen_cutlass_kimpl(dwconv2d_wgrad simt CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_wgrad simt CUTLASS_SOURCES) | ||||
gen_cutlass_kimpl(dwconv2d_wgrad tensorop884 CUTLASS_SOURCES) | gen_cutlass_kimpl(dwconv2d_wgrad tensorop884 CUTLASS_SOURCES) | ||||
# Compile the following opr first | |||||
list(PREPEND CUSOURCES ${CUTLASS_SOURCES}) | |||||
# Compile the following file first, the priority_compile_opr.txt is generated by | |||||
# ../../scripts/cmake-build/utils/adjust_compile_opr/sort_compile_time_map.py. | |||||
file( | file( | ||||
GLOB_RECURSE | |||||
PRIORITY_DIRS | |||||
cuda/type_cvt/*cu | |||||
cuda/conv_bias/*cu | |||||
cuda/reduce/*cu | |||||
cuda/relayout/*cu | |||||
cuda/relayout_format/*cu | |||||
cuda/powc/*cu | |||||
cuda/warp_perspective/*cu) | |||||
list(PREPEND PRIORITY_DIRS ${CUTLASS_SOURCES}) | |||||
list(PREPEND CUSOURCES ${PRIORITY_DIRS}) | |||||
list(REMOVE_DUPLICATES CUSOURCES) | |||||
STRINGS | |||||
${CMAKE_CURRENT_SOURCE_DIR}/../../scripts/cmake-build/utils/adjust_compile_opr/priority_compile_opr.txt | |||||
PRIORITY_FILES_TMPS) | |||||
foreach(PRIORITY_FILES_TMP ${PRIORITY_FILES_TMPS}) | |||||
foreach(CUSOURCE ${CUSOURCES}) | |||||
string(FIND ${CUSOURCE} ${PRIORITY_FILES_TMP} PRIORITY_FILES_FOUND) | |||||
if(NOT ${PRIORITY_FILES_FOUND} EQUAL -1) | |||||
list(APPEND PRIORITY_FILES ${CUSOURCE}) | |||||
list(REMOVE_ITEM CUSOURCES ${CUSOURCE}) | |||||
endif() | |||||
endforeach(CUSOURCE) | |||||
endforeach(PRIORITY_FILES_TMP) | |||||
list(PREPEND CUSOURCES ${PRIORITY_FILES}) | |||||
list(PREPEND SOURCES ${CUSOURCES}) | list(PREPEND SOURCES ${CUSOURCES}) | ||||
endif() | endif() | ||||
@@ -0,0 +1,9 @@ | |||||
# The main purpose of this folder is to adjust the compilation order of megdnn to improve the overall compilation time.If you add a new opr and find that it significantly slows down the compilation time, you can manually add the opr to the front of priority_compile_opr.txt. If you add more opr, you can also run: python3 sort_compile_time_map.py. However, this operation will be very time-consuming because it is a single-threaded compilation. | |||||
- priority_compile_opr.txt: Compile order sorted by compile time | |||||
- sort_compile_time_map.py: Generate the compile script for the above two files | |||||
## Usage | |||||
```bash | |||||
python3 sort_compile_time_map.py | |||||
``` | |||||
@@ -0,0 +1,46 @@ | |||||
cuda/type_cvt | |||||
cuda/cutlass | |||||
cuda/conv_bias | |||||
cuda/reduce | |||||
cuda/relayout | |||||
cuda/add_update | |||||
cuda/relayout_format | |||||
cuda/elemwise | |||||
cuda/argmxx | |||||
cuda/powc | |||||
cuda/elemwise_multi_type | |||||
cuda/cumsum | |||||
cuda/local_share | |||||
cuda/argsort | |||||
cuda/warp_perspective | |||||
cuda/local | |||||
cuda/batch_conv_bias | |||||
cuda/warp_affine | |||||
cuda/fake_quant | |||||
cuda/remap | |||||
cuda/cond_take | |||||
cuda/rng | |||||
cuda/tqt | |||||
cuda/resize | |||||
cuda/lsq | |||||
cuda/convolution3d | |||||
cuda/check_non_finite | |||||
cuda/indexing_one_hot | |||||
cuda/indexing_multi_axis_vec | |||||
cuda/convpooling | |||||
cuda/convolution | |||||
cuda/tile | |||||
cuda/padding | |||||
cuda/matrix_mul | |||||
cuda/gaussian_blur | |||||
cuda/flip | |||||
cuda/checksum | |||||
cuda/sliding_window_transpose | |||||
cuda/rotate | |||||
cuda/roi_pooling | |||||
cuda/roi_align | |||||
cuda/repeat | |||||
cuda/param_pack | |||||
cuda/linspace | |||||
cuda/layer_norm | |||||
cuda/images2neibs |
@@ -0,0 +1,41 @@ | |||||
# Get the top N files with the highest compilation time in megdnn, so that they can be compiled first | |||||
from click import command | |||||
import subprocess | |||||
import re | |||||
import tempfile | |||||
def topN_file(src_file_path,des_file_path,N): | |||||
src = open(src_file_path) | |||||
des = open(des_file_path, "w") | |||||
lines = src.readlines() | |||||
file_map = {} | |||||
for index,line in enumerate(lines): | |||||
if ("Building C" in line) and ("megdnn.dir" in line): | |||||
if "Elapsed time: " in lines[index + 1]: | |||||
key = line[line.find("megdnn.dir"):line.find(".o")] | |||||
value = lines[index + 1][lines[index + 1].find("Elapsed time: ") + 14:lines[index + 1].find(" s. ")] | |||||
file_map[key] = value | |||||
a = sorted(file_map.items(), key=lambda x: int(x[1]), reverse=True) | |||||
result_file = a[:N] | |||||
result_opr = [] | |||||
for i in result_file: | |||||
key= '/'.join(list(re.findall(r"megdnn.dir\/(.*?)\/(.*?)[\.\/]",i[0])[0])) | |||||
if key not in result_opr: | |||||
result_opr.append(key) | |||||
des.write(key + "\n") | |||||
src.close() | |||||
des.close() | |||||
return result_opr | |||||
def compile(cmd:str,dir:str, failed_name=3): | |||||
for i in range(failed_name): | |||||
subprocess.run(cmd, shell=True,cwd=t) | |||||
if __name__ == '__main__': | |||||
cmd = f''' | |||||
cmake .. -DMGE_PROFILE_COMPILE_TIME=ON | |||||
time make -j1 megdnn | tee megdnn_map_compile_time.txt | |||||
''' | |||||
with tempfile.TemporaryDirectory(dir = "../../../../") as t: | |||||
compile(cmd,t) | |||||
topN_file(t + "/megdnn_map_compile_time.txt","./priority_compile_opr.txt",500) |