You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

example.sh 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. #!/bin/bash
  2. OLD_IFS="$IFS"
  3. IFS=$'\n'
  4. TYPICAL_MODEL_DATD="model_source/resnet50_b1_float32_without_data.mge --input \"data:input_data/resnet50_input.npy\""
  5. DEVICE_DESC=""
  6. WORK_DIR_PATH="."
  7. RUN_ARM_DEVICE="false"
  8. RUN_TARGET="diff_model"
  9. ONLY_PREPARE_MODEL="false"
  10. MODEL_PREAPRED="false"
  11. ONLY_BUILD="false"
  12. LAR_BUILT="false"
  13. CLEAN_ALL="false"
  14. RUN_TARGETS=("diff_model")
  15. RUN_TARGETS+=("diff_device")
  16. RUN_TARGETS+=("fast_run")
  17. RUN_TARGETS+=("io")
  18. RUN_TARGETS+=("layout")
  19. RUN_TARGETS+=("optimize")
  20. RUN_TARGETS+=("plugin")
  21. RUN_TARGETS+=("all")
  22. function usage() {
  23. echo "$0 args1 args2 .."
  24. echo "available args detail:"
  25. echo "-p : prepare example model "
  26. echo "-b : build load_and_run for x86/armv7/arm64 cpu and CUDA"
  27. echo "-t : set the ssh arm device "
  28. echo "-w : set the arm device workspace dir"
  29. echo "-c : clean all"
  30. echo "-a : run all test"
  31. echo "-e : set the running target for test (details use \"-e\" to see)"
  32. echo "-h : show usage"
  33. exit -1
  34. }
  35. while getopts "pbcahe:w:t:" arg
  36. do
  37. case $arg in
  38. t)
  39. DEVICE_DESC=$OPTARG
  40. RUN_ARM_DEVICE="true"
  41. echo "config arm device DEVICE_DESC to ${DEVICE_DESC}"
  42. ;;
  43. w)
  44. WORK_DIR_PATH=$OPTARG
  45. echo "config arm device WORK_DIR_PATH to ${WORK_DIR_PATH}"
  46. ;;
  47. e)
  48. tmp_target=null
  49. for target in ${RUN_TARGETS[@]}; do
  50. if [ "$target" = "$OPTARG" ]; then
  51. echo "CONFIG BUILD RUN_TARGET to : $OPTARG"
  52. tmp_target=$OPTARG
  53. RUN_TARGET=$OPTARG
  54. break
  55. fi
  56. done
  57. if [ "$tmp_target" = "null" ]; then
  58. echo "ERR args for target (-e)"
  59. echo "available target usage :"
  60. for target in ${RUN_TARGETS[@]}; do
  61. echo " -e $target"
  62. done
  63. exit -1
  64. fi
  65. ;;
  66. h)
  67. echo "show usage"
  68. usage
  69. ;;
  70. a)
  71. echo "config RUN_TARGET=all"
  72. RUN_TARGET="all"
  73. ;;
  74. c)
  75. echo "clean all directory generated by script"
  76. CLEAN_ALL="true"
  77. ;;
  78. b)
  79. echo "run build"
  80. ONLY_BUILD="true"
  81. ;;
  82. p)
  83. echo "prepare model and input"
  84. ONLY_PREPARE_MODEL="true"
  85. ;;
  86. ?)
  87. echo "unkonw argument"
  88. usage
  89. ;;
  90. esac
  91. done
  92. function prepare_model_and_data(){
  93. rm -rf model_source && mkdir model_source
  94. # dump mgb model
  95. python3 script/resnet50_mgb.py -o model_source/resnet50.pkl
  96. ../dump_with_testcase.py model_source/resnet50.pkl -o model_source/resnet50_with_data.mgb -d "#rand(0, 255)" --no-assert
  97. # prepare simple add model
  98. python3 script/add_demo.py --dir model_source
  99. python3 script/conv_demo.py --dir model_source
  100. #generate trt model
  101. script/gen_trt_model.sh
  102. #prepare mge model
  103. python3 script/resnet50_mge.py --dir model_source
  104. python3 script/resnet50_mge.py --dir model_source -d uint8
  105. python3 script/resnet50_mge.py --dir model_source --inputs "#rand(0,255)"
  106. #make input_data
  107. rm -rf input_data && mkdir input_data
  108. python3 script/mge_input_data.py
  109. rm -rf tmpdir && mkdir tmpdir
  110. }
  111. function build_lar(){
  112. # build cpu and cuda version
  113. ../../../scripts/cmake-build/host_build.sh -r -t -e load_and_run
  114. #WARNING:config the cuda environment before compile
  115. ../../../scripts/cmake-build/host_build.sh -c -t -e load_and_run
  116. # # build arm version
  117. ../../../scripts/cmake-build/cross_build_android_arm_inference.sh -r -a arm64-v8a -e load_and_run
  118. ../../../scripts/cmake-build/cross_build_android_arm_inference.sh -r -a armeabi-v7a -e load_and_run
  119. # link or for test
  120. ln -s ../../../build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cpu
  121. ln -s ../../../build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cuda
  122. cp ../../../build_dir/android/arm64-v8a/Release/build/lite/load_and_run/load_and_run ./lar_arm64
  123. cp ../../../build_dir/android/armeabi-v7a/Release/build/lite/load_and_run/load_and_run ./lar_armv7
  124. }
  125. function set_arm_device_and_upload(){
  126. DEVICE_DESC="${1}"
  127. WORK_DIR_PATH="${2}"
  128. RUN_ARM_DEVICE="${3}"
  129. cmd="rsync -aP -zz ./lar_arm64 ./lar_armv7 model_source/resnet50_b1_float32_without_data.mge input_data/resnet50_input.npy $DEVICE_DESC:$WORK_DIR_PATH/"
  130. echo $cmd
  131. bash -c "$cmd"
  132. }
  133. function test_different_model(){
  134. CmdArray=("./lar_cpu model_source/resnet50_with_data.mgb")
  135. CmdArray+=("./lar_cpu model_source/resnet50_b1_float32_with_data.mge")
  136. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD")
  137. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --lite")
  138. for cmd in ${CmdArray[@]}; do
  139. echo "$cmd"
  140. bash -c "$cmd"
  141. done
  142. }
  143. function test_different_device(){
  144. #dispatch时,计算任务会加入一个工作队列,由队列统一管理执行 均值 131.278 ms 标准差 15.197ms m_asyc_exec异步执行
  145. CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD --cpu")
  146. #dispatch时,计算任务直接执行 均值 131.875 ms 标准差 7.758ms m_asyc_exec同步执行
  147. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu-default")
  148. #多线程运行 1~8平均运行时间(ms):129.611, 84.266, 76.963, 55.212, 69.283, 77.338, 58.386, 64.585
  149. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread 4")
  150. #主线程锁核,其他任务在线程池中的线程上运行 132.614, 83.095, 69.792, 54.452, 48.890, 48.206, 46.386, 53.908
  151. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread-default 4")
  152. #cpu多线程绑核(x86上绑核影响不大)
  153. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread 2 --multi-thread-core-ids 1,5")
  154. #xpu 设置为cpu上运行 132.740 ms comp_node:cpu
  155. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cpu")
  156. #xpu 设置为cuda上运行 6.495 ms comp_node:gpu
  157. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda")
  158. for cmd in ${CmdArray[@]}; do
  159. echo $cmd
  160. bash -c "$cmd"
  161. done
  162. function test_fast_run(){
  163. CmdArray=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run")
  164. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --full-run")
  165. #fast run 搜参
  166. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --fast-run-algo-policy tmpdir/algo_cache_file")
  167. #fast run 带参执行
  168. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run-algo-policy tmpdir/algo_cache_file")
  169. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --reproducible")
  170. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --fast-run-shared-batch-size 1")
  171. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --binary-equal-between-batch")
  172. for cmd in ${CmdArray[@]}; do
  173. echo $cmd
  174. bash -c "$cmd"
  175. done
  176. }
  177. function test_io(){
  178. rm -rf tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_out_info_cuda tmpdir/io_info.txt
  179. mkdir tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_io_info_cuda
  180. CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD")
  181. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"input_data/add_demo_input.json\"")
  182. #! the model must support input with nhwc shape
  183. CmdArray+=("./lar_cpu model_source/resnet50_b1_int8_without_data.mge --input \"data:input_data/cat.ppm\"")
  184. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\"")
  185. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump tmpdir/io_info.txt --iter 1 --warmup-iter 0")
  186. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump-stdout --iter 1 --warmup-iter 0")
  187. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump-stderr --iter 1 --warmup-iter 0")
  188. #different data in the given directory the name is the var id which is the same with txt-dump information
  189. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-io-dump tmpdir/bin_io_info --iter 1 --warmup-iter 0")
  190. CmdArray+=("./lar_cuda model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-io-dump tmpdir/bin_io_info_cuda --iter 1 --warmup-iter 0")
  191. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-out-dump tmpdir/bin_out_info --iter 1 --warmup-iter 0")
  192. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --copy-to-host")
  193. for cmd in ${CmdArray[@]}; do
  194. echo $cmd
  195. bash -c "$cmd"
  196. done
  197. #compare the binary io information
  198. python3 ../../../imperative/python/megengine/tools/compare_binary_iodump.py tmpdir/bin_io_info tmpdir/bin_io_info_cuda
  199. }
  200. function test_layout_related(){
  201. # very little speed up
  202. CmdArray=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw4")
  203. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-chwn4")
  204. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw32")
  205. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw64")
  206. #speed up
  207. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --enable-nchw88")
  208. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda --layout-transform cuda")
  209. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda --layout-transform cuda --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge")
  210. CmdArray+=("./lar_cuda model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge")
  211. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu --layout-transform cpu")
  212. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu --layout-transform cpu --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge")
  213. CmdArray+=("./lar_cpu model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge")
  214. for cmd in ${CmdArray[@]}; do
  215. echo $cmd
  216. bash -c "$cmd"
  217. done
  218. if [ ${RUN_ARM_DEVICE} == "true" ] ; then
  219. #speed up
  220. CmdArray=("./lar_arm64 resnet50_b1_float32_without_data.mge --input \"data:resnet50_input.npy\" --cpu --enable-nchw44")
  221. #speed up
  222. CmdArray+=("./lar_arm64 resnet50_b1_float32_without_data.mge --input \"data:resnet50_input.npy\" --cpu --enable-nchw44-dot")
  223. for cmd in ${CmdArray[@]}; do
  224. echo $cmd
  225. ssh -t $DEVICE_DESC "unset LD_PRELOAD && cd $WORK_DIR_PATH && LD_LIBRARY_PATH=./ $cmd"
  226. done
  227. else
  228. echo "SET arm device ON : $RUN_ARM_DEVICE"
  229. fi
  230. }
  231. function test_optimize(){
  232. CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD --enable-fuse-preprocess")
  233. #warm up speed up
  234. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-fuse-conv-bias-nonlinearity")
  235. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-fuse-conv-bias-with-z")
  236. CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt")
  237. CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt --tensorrt-cache tmpdir/TRT_cache")
  238. CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt-cache tmpdir/TRT_cache")
  239. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --no-sanity-check --record-comp-seq2")
  240. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --disable_mem_opt")
  241. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --workspace_limit 10000")
  242. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --fake-first")
  243. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable_jit ")
  244. for cmd in ${CmdArray[@]}; do
  245. echo $cmd
  246. bash -c "$cmd"
  247. done
  248. }
  249. function test_plugin(){
  250. rm -rf tmpdir/staticMemInfoDir tmpdir/staticMemInfoDirLogs
  251. mkdir tmpdir/staticMemInfoDir
  252. CmdArray=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --check-dispatch")
  253. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --check-var-value 5:0")
  254. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --range 2")
  255. CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --profile tmpdir/opr_profile.json
  256. ")
  257. CmdArray+=("./lar_cuda model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --profile-host tmpdir/opr_profile_host.json")
  258. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --model-info")
  259. CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --verbose")
  260. CmdArray+=("./lar_cpu model_source/resnet50_with_data.mgb --disable-assert-throw")
  261. # wait gdb attach to given PID
  262. # CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --wait-gdb")
  263. CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --get-static-mem-info tmpdir/staticMemInfoDir")
  264. for cmd in ${CmdArray[@]}; do
  265. echo $cmd
  266. bash -c "$cmd"
  267. done
  268. # view the graph with given url (usally: http://localhost:6006/)
  269. # mkdir tmpdir/staticMemInfoDirLogs && python3 ../../../imperative/python/megengine/tools/graph_info_analyze.py -i tmpdir/staticMemInfoDir -o tmpdir/staticMemInfoDirLogs
  270. # pip3 install tensorboard && tensorboard --logdir tmpdir/staticMemInfoDirLogs
  271. }
  272. function clean(){
  273. rm -rf tmpdir model_source input_data lar_cpu lar_cuda lar_arm64 lar_armv7
  274. }
  275. function main(){
  276. if [ ${CLEAN_ALL} == "true" ] ; then
  277. clean
  278. exit 0
  279. fi
  280. if [ ${ONLY_PREPARE_MODEL} == "true" ] ; then
  281. prepare_model_and_data
  282. MODEL_PREAPRED="true"
  283. exit 0
  284. fi
  285. if [ ${ONLY_BUILD} == "true" ] ; then
  286. build_lar
  287. LAR_BUILT="true"
  288. exit 0
  289. fi
  290. if [ ${RUN_ARM_DEVICE} == "true" ] ; then
  291. set_arm_device_and_upload $DEVICE_DESC "$WORK_DIR_PATH" "true"
  292. fi
  293. if [ ${MODEL_PREAPRED} != "true" ] ; then
  294. CHECK_MODEL=$(find . -name add_demo_input.json)
  295. if [ ${CHECK_MODEL} == "" ] ; then
  296. prepare_model_and_data
  297. MODEL_PREAPRED="true"
  298. fi
  299. fi
  300. if [ ${LAR_BUILT} != "true" ] ; then
  301. CHECK_LAR=$(find . -name lar_armv7)
  302. if [ ${CHECK_LAR} == "" ] ; then
  303. build_lar
  304. LAR_BUILT="true"
  305. fi
  306. fi
  307. if [ ${RUN_TARGET} == "diff_model" -o ${RUN_TARGET} == "all" ] ; then
  308. test_different_model
  309. fi
  310. if [ ${RUN_TARGET} == "diff_device" -o ${RUN_TARGET} == "all" ] ; then
  311. test_different_device
  312. fi
  313. if [ ${RUN_TARGET} == "fast_run" -o ${RUN_TARGET} == "all" ] ; then
  314. test_fast_run
  315. fi
  316. if [ ${RUN_TARGET} == "io" -o ${RUN_TARGET} == "all" ] ; then
  317. test_io
  318. fi
  319. if [ ${RUN_TARGET} == "layout" -o ${RUN_TARGET} == "all" ] ; then
  320. test_layout_related
  321. fi
  322. if [ ${RUN_TARGET} == "optimize" -o ${RUN_TARGET} == "all" ] ; then
  323. test_optimize
  324. fi
  325. if [ ${RUN_TARGET} == "plugin" -o ${RUN_TARGET} == "all" ] ; then
  326. test_plugin
  327. fi
  328. }
  329. main
  330. IFS=$OLD_IFS