You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 47 kB

5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108
  1. cmake_minimum_required(VERSION 3.15.2)
  2. message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
  3. if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
  4. message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
  5. endif()
  6. include (cmake/FetchMegBrainVersion.cmake)
  7. project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
  8. set(CMAKE_CXX_STANDARD 14)
  9. set(CMAKE_CXX_STANDARD_REQUIRED ON)
  10. set(CMAKE_CXX_EXTENSIONS OFF)
  11. set(CMAKE_POSITION_INDEPENDENT_CODE ON)
  12. set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
  13. set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
  14. if(NOT MSVC AND NOT APPLE AND NOT WIN32)
  15. set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  16. set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
  17. set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
  18. endif()
  19. include(GNUInstallDirs)
  20. include(CheckCXXCompilerFlag)
  21. include(CheckIPOSupported)
  22. CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
  23. set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
  24. set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
  25. x86_64 i386
  26. armv7 aarch64
  27. naive fallback
  28. )
  29. set (MGE_EXPORT_TARGETS MegEngine-targets)
  30. option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
  31. option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
  32. option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
  33. option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
  34. option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
  35. option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
  36. option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
  37. option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
  38. option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
  39. option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
  40. option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
  41. option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
  42. option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
  43. option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
  44. option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
  45. option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
  46. option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
  47. option(BUILD_SHARED_LIBS "Build shared libraries" ON)
  48. option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
  49. option(MGE_ENABLE_RTTI "Build with RTTI" ON)
  50. option(MGE_ENABLE_LOGGING "Build with logging" ON)
  51. option(MGE_DEBUG_UTIL "Enable debug utility" ON)
  52. option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
  53. option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
  54. option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
  55. option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
  56. option(MGE_BUILD_SDK "Build load_and_run" ON)
  57. option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
  58. option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
  59. option(MGE_WITH_ROCM "Enable ROCM support" OFF)
  60. option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
  61. option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
  62. if(MSVC OR WIN32)
  63. message(STATUS "windows force cudnn static link")
  64. set(MGE_WITH_CUDNN_SHARED OFF)
  65. endif()
  66. if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
  67. set(MGE_WITH_ANY_CUDA_STUB ON)
  68. else()
  69. set(MGE_WITH_ANY_CUDA_STUB OFF)
  70. endif()
  71. if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
  72. message(STATUS "build with BIN REDUCE")
  73. if(MGE_WITH_MINIMUM_SIZE)
  74. set(MGE_ENABLE_RTTI OFF)
  75. set(MGE_ENABLE_LOGGING OFF)
  76. set(MGE_ENABLE_EXCEPTIONS OFF)
  77. set(MGE_INFERENCE_ONLY ON)
  78. endif()
  79. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
  80. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
  81. endif()
  82. if (NOT APPLE)
  83. # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
  84. CHECK_CXX_COMPILER_FLAG("-ffunction-sections -fdata-sections -Wl,--gc-sections" CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
  85. if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
  86. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
  87. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
  88. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
  89. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
  90. endif()
  91. endif()
  92. check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
  93. if(IS_LTO_SUPPORT)
  94. message(STATUS "lto is supported in this compiler")
  95. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
  96. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
  97. else()
  98. message(STATUS "lto is not supported in this compiler")
  99. endif()
  100. if(MGE_WITH_MIDOUT_PROFILE)
  101. message(STATUS "build with MIDOUT PROFILE")
  102. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
  103. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
  104. endif()
  105. if (APPLE)
  106. set (BUILD_SHARED_LIBS OFF)
  107. message(STATUS "build static for xcode framework require")
  108. endif()
  109. if (MGE_USE_SYSTEM_LIB)
  110. set (MGE_CUDA_USE_STATIC OFF)
  111. endif()
  112. if (MGB_WITH_FLATBUFFERS)
  113. set(MGB_ENABLE_FBS_SERIALIZATION ON)
  114. endif()
  115. if(CMAKE_TOOLCHAIN_FILE)
  116. message(STATUS "We are cross compiling.")
  117. message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  118. set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  119. if(ANDROID_TOOLCHAIN_ROOT)
  120. if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
  121. set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
  122. endif()
  123. if(${ANDROID_ARCH} STREQUAL "arm")
  124. set(MGE_ARCH "armv7")
  125. elseif(${ANDROID_ARCH} STREQUAL "arm64")
  126. set(MGE_ARCH "aarch64")
  127. else()
  128. message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
  129. endif()
  130. elseif(IOS_TOOLCHAIN_ROOT)
  131. if(${IOS_ARCH} STREQUAL "armv7")
  132. set(MGE_ARCH "armv7")
  133. elseif(${IOS_ARCH} STREQUAL "arm64")
  134. set(MGE_ARCH "aarch64")
  135. elseif(${IOS_ARCH} STREQUAL "armv7k")
  136. set(MGE_ARCH "armv7")
  137. elseif(${IOS_ARCH} STREQUAL "arm64e")
  138. set(MGE_ARCH "aarch64")
  139. elseif(${IOS_ARCH} STREQUAL "armv7s")
  140. set(MGE_ARCH "armv7")
  141. else()
  142. message(FATAL_ERROR "Unsupported IOS_ARCH.")
  143. endif()
  144. elseif(RISCV_TOOLCHAIN_ROOT)
  145. set(MGE_ARCH "riscv64")
  146. elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
  147. set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  148. else()
  149. message(FATAL_ERROR "Unknown cross-compiling settings.")
  150. endif()
  151. message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
  152. endif()
  153. if(${MGE_ARCH} STREQUAL "AUTO")
  154. if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
  155. set(MGE_ARCH "x86_64")
  156. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  157. set(MGE_ARCH "i386")
  158. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
  159. set(MGE_ARCH "aarch64")
  160. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
  161. set(MGE_ARCH "armv7")
  162. else()
  163. message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
  164. endif()
  165. endif()
  166. if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
  167. message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  168. set(CMAKE_BUILD_TYPE RelWithDebInfo)
  169. endif()
  170. if(${CMAKE_BUILD_TYPE} STREQUAL "Release" AND NOT MGE_WITH_TEST AND NOT ${MGE_ARCH} STREQUAL "x86_64")
  171. set(MGE_ENABLE_RTTI OFF)
  172. message(STATUS "disable MGE_ENABLE_RTTI when Release/NON-x86_64 mode!!")
  173. endif()
  174. if(MSVC OR WIN32)
  175. # for cmake after 3.15.2
  176. cmake_policy(SET CMP0091 NEW)
  177. set(CMAKE_OBJECT_PATH_MAX 300)
  178. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  179. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
  180. else()
  181. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
  182. endif()
  183. add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  184. message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  185. if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
  186. message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
  187. endif()
  188. # on windows need append VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows
  189. # and VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH env
  190. if (MGE_BUILD_WITH_ASAN)
  191. if("$ENV{VS_PATH}" STREQUAL "")
  192. message(FATAL_ERROR "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
  193. endif()
  194. if(${MGE_ARCH} STREQUAL "x86_64")
  195. set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
  196. set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
  197. set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
  198. elseif(${MGE_ARCH} STREQUAL "i386")
  199. set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
  200. set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
  201. set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
  202. else()
  203. message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
  204. endif()
  205. find_path(ASAN_DLL_PATH
  206. NAMES ${WINDOWS_ASAN_DLL_NAME}
  207. HINTS $ENV{VS_PATH}
  208. PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
  209. DOC "Windows asan library path" )
  210. if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
  211. message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
  212. endif()
  213. message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
  214. link_directories(${ASAN_DLL_PATH})
  215. link_libraries(${WINDOWS_ASAN_DLL_NAME})
  216. link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
  217. set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
  218. else()
  219. set(WIN_FLAGS "/O2")
  220. endif()
  221. # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
  222. set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
  223. # if u CPU is cascadelake series, u can enable for performance
  224. # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
  225. # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
  226. # for windows build
  227. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  228. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
  229. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
  230. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
  231. set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
  232. set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
  233. set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
  234. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  235. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
  236. #FIXME: fix halide JIT on windows
  237. message(STATUS "disable jit, halide and mlir on windows host build...")
  238. set(MGE_WITH_HALIDE OFF)
  239. set(MGE_WITH_JIT OFF)
  240. set(MGE_WITH_JIT_MLIR OFF)
  241. #FIXME: fix MegRay on windows
  242. message(STATUS "Disable distributed build on windows host build...")
  243. set(MGE_WITH_DISTRIBUTED OFF)
  244. else()
  245. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
  246. # NONE windows DEBUG general flags
  247. if(MGE_BUILD_WITH_ASAN)
  248. set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
  249. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
  250. else()
  251. set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
  252. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  253. endif()
  254. # NONE windows opt general flags
  255. if (MGE_BUILD_WITH_ASAN)
  256. set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
  257. elseif(ANDROID)
  258. set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
  259. else()
  260. set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
  261. endif()
  262. set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  263. set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  264. set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  265. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  266. #some gnu(gcc) compiler use -static -libasan have runtime issue
  267. #also, when target is big, clang ld will take a long long long
  268. #time when use -static-libsan, so we use dynamic asan by default
  269. #ANDROID asan.so depends on log, so broadcast log link_libraries
  270. #for megengine depends target, for example flatc target
  271. if (MGE_BUILD_WITH_ASAN AND ANDROID)
  272. link_libraries(log)
  273. endif()
  274. endif()
  275. if(MGE_WITH_CUDA)
  276. include(cmake/cudnn.cmake)
  277. if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  278. message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
  279. set(MGE_WITH_LARGE_ARCHIVE ON)
  280. endif()
  281. endif()
  282. CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
  283. if(MGE_WITH_LARGE_ARCHIVE)
  284. message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
  285. set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
  286. elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
  287. message(STATUS "Using GNU gold linker.")
  288. set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
  289. endif()
  290. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  291. set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  292. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  293. if(NOT MGE_WITH_JIT)
  294. if(MGE_WITH_HALIDE)
  295. message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
  296. set(MGE_WITH_HALIDE OFF)
  297. endif()
  298. if(MGE_WITH_JIT_MLIR)
  299. message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
  300. set(MGE_WITH_JIT_MLIR OFF)
  301. endif()
  302. endif()
  303. # FIXME At present, there are some conflicts between the LLVM that halide
  304. # depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
  305. # versions.
  306. if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
  307. message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
  308. endif()
  309. if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
  310. message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
  311. endif()
  312. if(MGE_WITH_CUDA)
  313. # FIXME: check_language(CUDA) failed when sbsa mode!
  314. # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
  315. if(CMAKE_TOOLCHAIN_FILE)
  316. set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  317. message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
  318. endif()
  319. include(CheckLanguage)
  320. check_language(CUDA)
  321. if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
  322. message(FATAL_ERROR "CUDA compiler not found in PATH")
  323. endif()
  324. # remove this after CMAKE fix nvcc sbsa
  325. if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
  326. set(CMAKE_CUDA_COMPILER "nvcc")
  327. message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
  328. endif()
  329. enable_language(CUDA)
  330. set(CMAKE_CUDA_STANDARD 14)
  331. set(CMAKE_CUDA_STANDARD_REQUIRED ON)
  332. endif()
  333. if(NOT MGE_WITH_CUDA)
  334. if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
  335. message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
  336. set(MGE_WITH_JIT OFF)
  337. set(MGE_WITH_JIT_MLIR OFF)
  338. endif()
  339. set(MGE_WITH_HALIDE OFF)
  340. message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
  341. set(MGE_WITH_TRT OFF)
  342. endif()
  343. find_package(PythonInterp 3 REQUIRED)
  344. # NOTICE: just use for target, which do not depend on python api
  345. # PURPOSE: reuse target obj when switch python3 version
  346. # will fallback to PYTHON_EXECUTABLE if can not find in PATH env
  347. set(PYTHON3_IN_ENV "python3")
  348. find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  349. if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
  350. message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
  351. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  352. else()
  353. message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
  354. target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
  355. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
  356. endif()
  357. set(THREADS_PREFER_PTHREAD_FLAG ON)
  358. find_package(Threads)
  359. if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
  360. if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
  361. set_property(TARGET Threads::Threads
  362. PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
  363. "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  364. endif()
  365. endif()
  366. set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
  367. set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
  368. set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
  369. if(NOT CMAKE_CUDA_HOST_COMPILER)
  370. set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
  371. endif()
  372. if(NOT MGE_ENABLE_RTTI)
  373. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
  374. endif()
  375. if(NOT MGE_ENABLE_EXCEPTIONS)
  376. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
  377. endif()
  378. if(MGE_WITH_TEST)
  379. include(cmake/gtest.cmake)
  380. endif()
  381. if(MGE_BUILD_IMPERATIVE_RT)
  382. set(CMAKE_CXX_STANDARD 17)
  383. endif()
  384. if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
  385. message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
  386. set(MGE_WITH_DISTRIBUTED OFF)
  387. endif()
  388. if(MGE_INFERENCE_ONLY)
  389. message(STATUS "Disable distributed support for inference only build.")
  390. set(MGE_WITH_DISTRIBUTED OFF)
  391. message(STATUS "Disable imperative_rt python module for inference only build.")
  392. set(MGE_BUILD_IMPERATIVE_RT OFF)
  393. endif()
  394. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  395. include(cmake/llvm-project.cmake)
  396. endif()
  397. if(MGE_WITH_DISTRIBUTED)
  398. include(cmake/protobuf.cmake)
  399. include(cmake/zmq.cmake)
  400. endif()
  401. if(MGB_WITH_FLATBUFFERS)
  402. include(cmake/flatbuffers.cmake)
  403. endif()
  404. if(MGE_WITH_CUDA)
  405. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  406. foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  407. get_filename_component(_NAME ${path} NAME)
  408. if(NOT ${_NAME} STREQUAL "stubs")
  409. list(APPEND CUDA_LINK_DIRECTORIES ${path})
  410. endif()
  411. endforeach()
  412. link_directories(${CUDA_LINK_DIRECTORIES})
  413. set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  414. set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  415. set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  416. set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  417. if(MSVC OR WIN32)
  418. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
  419. set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
  420. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  421. set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
  422. endif()
  423. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
  424. else()
  425. set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  426. endif()
  427. if(NOT MGE_ENABLE_RTTI)
  428. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  429. endif()
  430. if(NOT MGE_ENABLE_EXCEPTIONS)
  431. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
  432. endif()
  433. if(NOT MGE_CUDA_GENCODE)
  434. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
  435. set(MEGDNN_THREADS_512 0)
  436. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  437. message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
  438. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  439. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  440. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  441. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
  442. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  443. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  444. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  445. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  446. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
  447. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
  448. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
  449. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  450. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  451. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  452. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  453. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
  454. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  455. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  456. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  457. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  458. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  459. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  460. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
  461. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
  462. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  463. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  464. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  465. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  466. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
  467. else()
  468. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
  469. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  470. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  471. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  472. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
  473. endif()
  474. else()
  475. message(FATAL_ERROR "Unsupported CUDA host arch.")
  476. endif()
  477. else()
  478. set(MEGDNN_THREADS_512 1)
  479. endif()
  480. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  481. if(MGE_WITH_TRT)
  482. include(cmake/tensorrt.cmake)
  483. endif()
  484. if(MGE_CUDA_USE_STATIC)
  485. if(MGE_WITH_TRT)
  486. if(MSVC OR WIN32)
  487. message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
  488. list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
  489. else()
  490. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
  491. endif()
  492. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  493. message(STATUS "handle trt myelin lib after trt7")
  494. list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
  495. endif()
  496. endif()
  497. if("${CUDNN_VERSION}" STREQUAL "7.5.0")
  498. if(MSVC OR WIN32)
  499. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  500. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  501. else()
  502. message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
  503. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
  504. endif()
  505. else()
  506. if(MSVC OR WIN32)
  507. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  508. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  509. else()
  510. list(APPEND MGE_CUDA_LIBS libcudnn)
  511. endif()
  512. endif()
  513. if(MSVC OR WIN32)
  514. list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
  515. else()
  516. list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
  517. endif()
  518. if(MSVC OR WIN32)
  519. list(APPEND MGE_CUDA_LIBS cublas.lib)
  520. else()
  521. if(MGE_WITH_CUBLAS_SHARED)
  522. list(APPEND MGE_CUDA_LIBS cublas)
  523. else()
  524. list(APPEND MGE_CUDA_LIBS cublas_static)
  525. endif()
  526. endif()
  527. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  528. if(MSVC OR WIN32)
  529. list(APPEND MGE_CUDA_LIBS cublasLt.lib)
  530. else()
  531. if(MGE_WITH_CUBLAS_SHARED)
  532. list(APPEND MGE_CUDA_LIBS cublasLt)
  533. else()
  534. list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
  535. endif()
  536. endif()
  537. endif()
  538. if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
  539. # mark all symbols from liblapack_static.a as weak to avoid
  540. # duplicated definition with mkl
  541. find_library(
  542. LAPACK_STATIC_PATH lapack_static
  543. HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  544. if(NOT LAPACK_STATIC_PATH)
  545. message(FATAL_ERROR "liblapack_static.a not found")
  546. endif()
  547. set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
  548. # add a target that run objcopy
  549. add_custom_command(
  550. OUTPUT ${LAPACK_STATIC_COPY_PATH}
  551. COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
  552. VERBATIM)
  553. add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
  554. # create a library named "lapack_static_weak"
  555. add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
  556. add_dependencies(lapack_static_weak lapack_static_weak_target)
  557. set_target_properties(
  558. lapack_static_weak PROPERTIES
  559. IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
  560. list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
  561. endif()
  562. else()
  563. if(MGE_WITH_TRT)
  564. list(APPEND MGE_CUDA_LIBS libnvinfer)
  565. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  566. message(STATUS "handle trt myelin lib after trt7")
  567. list(APPEND MGE_CUDA_LIBS libmyelin)
  568. endif()
  569. endif()
  570. list(APPEND MGE_CUDA_LIBS libcudnn)
  571. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  572. list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
  573. endif()
  574. list(APPEND MGE_CUDA_LIBS cudart)
  575. endif()
  576. if(NOT MGE_WITH_CUDA_STUB)
  577. if(MSVC OR WIN32)
  578. list(APPEND MGE_CUDA_LIBS cuda.lib)
  579. else()
  580. list(APPEND MGE_CUDA_LIBS cuda)
  581. endif()
  582. endif()
  583. if(NOT MGE_WITH_NVRTC_STUB)
  584. if(MSVC OR WIN32)
  585. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  586. else()
  587. list(APPEND MGE_CUDA_LIBS nvrtc)
  588. endif()
  589. endif()
  590. if(MGE_WITH_ANY_CUDA_STUB)
  591. add_subdirectory(dnn/cuda-stub)
  592. list(APPEND MGE_CUDA_LIBS cuda-stub)
  593. endif()
  594. if(MSVC OR WIN32)
  595. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  596. else()
  597. list(APPEND MGE_CUDA_LIBS nvToolsExt)
  598. endif()
  599. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
  600. if(UNIX)
  601. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
  602. endif()
  603. endif()
  604. ###########please add_subdirectory from here###############
  605. if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
  606. option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
  607. if(MGE_ENABLE_CPUINFO)
  608. message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
  609. add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
  610. include(cmake/cpuinfo.cmake)
  611. endif()
  612. endif()
  613. if(MGE_WITH_CAMBRICON)
  614. include_directories("$ENV{NEUWARE_HOME}/include")
  615. link_directories("$ENV{NEUWARE_HOME}/lib64")
  616. include(cmake/FindBANG/FindBANG.cmake)
  617. if (${MGE_MLU_ARCH} STREQUAL "MLU100")
  618. set(BANG_ARCH "100")
  619. elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
  620. set(BANG_ARCH "110")
  621. elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
  622. set(BANG_ARCH "220")
  623. elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
  624. set(BANG_ARCH "270")
  625. elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
  626. set(BANG_ARCH "290")
  627. elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
  628. set(BANG_ARCH "200")
  629. else()
  630. message (FATAL_ERROR "Unsupported MLU arch.")
  631. endif()
  632. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
  633. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
  634. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
  635. if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  636. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
  637. elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
  638. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
  639. elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  640. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
  641. elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
  642. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
  643. endif()
  644. include(cmake/cnrt.cmake)
  645. include(cmake/cndev.cmake)
  646. include(cmake/cnml.cmake)
  647. list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
  648. set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
  649. endif()
  650. if (MGE_WITH_ROCM)
  651. include(cmake/rocm.cmake)
  652. endif ()
  653. if(MGE_WITH_ATLAS)
  654. add_subdirectory(dnn/atlas-stub)
  655. list(APPEND MGE_ATLAS_LIBS atlas-stub)
  656. set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
  657. set(MGB_ATLAS ${MGE_WITH_ATLAS})
  658. endif()
  659. find_program(CCACHE_BIN ccache)
  660. if(CCACHE_BIN)
  661. set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  662. if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
  663. message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
  664. set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  665. endif()
  666. endif()
  667. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  668. if(${MGE_BLAS} STREQUAL "MKL")
  669. include(cmake/mkl.cmake)
  670. set(MGE_BLAS_LIBS libmkl)
  671. elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
  672. include(cmake/OpenBLAS.cmake)
  673. set(MGE_BLAS_LIBS libopenblas)
  674. else()
  675. message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  676. endif()
  677. endif()
  678. # MKLDNN build
  679. if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
  680. include(cmake/MKL_DNN.cmake)
  681. set(MEGDNN_X86_WITH_MKL_DNN 1)
  682. endif()
  683. # RTTI
  684. if(MGE_ENABLE_RTTI)
  685. set(MEGDNN_ENABLE_MANGLING 0)
  686. set(MEGDNN_ENABLE_RTTI 1)
  687. else()
  688. set(MEGDNN_ENABLE_MANGLING 1)
  689. set(MEGDNN_ENABLE_RTTI 0)
  690. endif()
  691. set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
  692. # Logging
  693. set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  694. set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  695. set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
  696. # Exception
  697. if(NOT MGE_ENABLE_EXCEPTIONS)
  698. message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
  699. endif()
  700. set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
  701. set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
  702. # JIT
  703. if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
  704. set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
  705. include(cmake/Halide.cmake)
  706. endif()
  707. # Thread
  708. IF(APPLE)
  709. set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  710. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  711. set(CMAKE_USE_WIN32_THREADS_INIT 0)
  712. set(CMAKE_USE_PTHREADS_INIT 1)
  713. set(THREADS_PREFER_PTHREAD_FLAG ON)
  714. message(STATUS "disable jit, halide and mlir on macos host build...")
  715. set(MGE_WITH_HALIDE OFF)
  716. set(MGE_WITH_JIT OFF)
  717. set(MGE_WITH_JIT_MLIR OFF)
  718. ENDIF()
  719. set(MGB_JIT ${MGE_WITH_JIT})
  720. set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
  721. set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
  722. # for consumer override MGB_C_OPR_INIT_FUNC symbol interface
  723. if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
  724. add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
  725. message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
  726. endif()
  727. if(MSVC OR WIN32)
  728. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  729. set(CMAKE_USE_WIN32_THREADS_INIT 1)
  730. set(CMAKE_USE_PTHREADS_INIT 1)
  731. set(THREADS_PREFER_PTHREAD_FLAG ON)
  732. endif()
  733. if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
  734. set(MGB_HAVE_THREAD 1)
  735. endif()
  736. if(MGE_WITH_TEST)
  737. # use intra-op multi threads
  738. set(MEGDNN_ENABLE_MULTI_THREADS 1)
  739. endif()
  740. # CUDA
  741. set(MGB_CUDA ${MGE_WITH_CUDA})
  742. set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
  743. #ROCM
  744. set(MGB_ROCM ${MGE_WITH_ROCM})
  745. set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
  746. # CAMBRICON
  747. set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
  748. set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
  749. # ENFLAME
  750. set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
  751. set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
  752. # Debug info
  753. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  754. set(MGB_ASSERT_LOC 1)
  755. set(MGB_ENABLE_DEBUG_UTIL 1)
  756. else()
  757. set(MGB_ASSERT_LOC 0)
  758. set(MGB_ENABLE_DEBUG_UTIL 0)
  759. endif()
  760. # TensorRT
  761. set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
  762. # Inference only
  763. if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
  764. set(MGB_ENABLE_GRAD 0)
  765. set(MGB_BUILD_SLIM_SERVING 1)
  766. else()
  767. set(MGB_ENABLE_GRAD 1)
  768. set(MGB_BUILD_SLIM_SERVING 0)
  769. endif()
  770. # Distributed communication
  771. set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
  772. # MGE_ARCH related flags
  773. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  774. if(MGE_BLAS STREQUAL "MKL")
  775. set(MEGDNN_X86_WITH_MKL 1)
  776. elseif(MGE_BLAS STREQUAL "OpenBLAS")
  777. set(MEGDNN_X86_WITH_OPENBLAS 1)
  778. endif()
  779. endif()
  780. # Enable Naive
  781. if(MGE_ARCH STREQUAL "naive")
  782. set(MEGDNN_NAIVE 1)
  783. message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
  784. endif()
  785. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  786. set(MEGDNN_X86 1)
  787. if(MGE_ARCH STREQUAL "x86_64")
  788. set(MEGDNN_X86_64 1)
  789. set(MEGDNN_64_BIT 1)
  790. if(NOT MSVC)
  791. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
  792. endif()
  793. else()
  794. set(MEGDNN_X86_32 1)
  795. if(NOT MSVC)
  796. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
  797. endif()
  798. endif()
  799. if(NOT MSVC)
  800. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  801. endif()
  802. endif()
  803. # dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
  804. if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
  805. CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
  806. if(CXX_COMPILER_SUPPORT_DOT)
  807. message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
  808. set(MGB_ENABLE_DOT 1)
  809. endif()
  810. endif()
  811. if(MGE_ARCH STREQUAL "armv7")
  812. # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  813. if(ANDROID)
  814. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  815. endif()
  816. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  817. set (MARCH "-march=armv7-a")
  818. set (MEGDNN_ARMV7 1)
  819. endif()
  820. if(MGE_ARCH STREQUAL "aarch64")
  821. set(MEGDNN_AARCH64 1)
  822. set(MEGDNN_64_BIT 1)
  823. set(MARCH "-march=armv8-a")
  824. set(MGB_AARCH64 1)
  825. if(MGE_ARMV8_2_FEATURE_FP16)
  826. message(STATUS "Enable fp16 feature support in armv8.2")
  827. if(NOT ${MGE_DISABLE_FLOAT16})
  828. set(MEGDNN_ENABLE_FP16_NEON 1)
  829. endif()
  830. set(MARCH "-march=armv8.2-a+fp16")
  831. endif()
  832. if(MGE_WITH_CUDA)
  833. message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
  834. when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
  835. for save link time(14min->1min), you may open below flags if not deploy on\
  836. arm a53 platform, or just build release type!")
  837. #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
  838. endif()
  839. endif()
  840. if(MGE_ARCH STREQUAL "riscv64")
  841. set(MEGDNN_RISCV64 1)
  842. set(MEGDNN_64_BIT 1)
  843. endif()
  844. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
  845. set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")
  846. # Write out megbrain_build_config.h
  847. # It defines macros needed by both megbrain and dnn
  848. configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
  849. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  850. add_subdirectory(dnn)
  851. list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
  852. set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
  853. set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
  854. file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
  855. add_custom_command(
  856. OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  857. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  858. DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  859. VERBATIM
  860. )
  861. list(APPEND MGB_OPR_PARAM_DEFS_OUTS
  862. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  863. )
  864. install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
  865. list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
  866. add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
  867. add_library(mgb_opr_param_defs INTERFACE)
  868. target_include_directories(mgb_opr_param_defs
  869. INTERFACE
  870. $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  871. $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
  872. )
  873. add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
  874. install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
  875. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  876. # generate param_defs.td
  877. set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
  878. set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
  879. set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
  880. set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
  881. set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
  882. file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
  883. file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
  884. file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
  885. file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
  886. add_custom_command(
  887. OUTPUT ${OPR_PARAM_DEFS_OUT}
  888. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
  889. DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
  890. VERBATIM
  891. )
  892. # mlir tblgen sources
  893. set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
  894. set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
  895. list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
  896. file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
  897. add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
  898. endif()
  899. if(MGE_WITH_DISTRIBUTED)
  900. set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
  901. set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
  902. add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
  903. endif()
  904. add_subdirectory(src)
  905. if(MGE_BUILD_SDK)
  906. add_subdirectory(sdk/load-and-run)
  907. endif()
  908. if(MGE_BUILD_IMPERATIVE_RT)
  909. add_subdirectory(imperative)
  910. message(STATUS "Enable imperative python wrapper runtime")
  911. endif()
  912. if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
  913. add_subdirectory(test)
  914. endif()
  915. if(TARGET _imperative_rt)
  916. add_custom_target(
  917. develop
  918. COMMAND ${CMAKE_COMMAND} -E create_symlink
  919. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  920. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  921. COMMAND ${CMAKE_COMMAND} -E create_symlink
  922. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  923. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  924. DEPENDS _imperative_rt
  925. VERBATIM
  926. )
  927. endif()
  928. # Configure and install pkg-config.
  929. # Note that unlike the Config.cmake modules, this is not relocatable (and not
  930. # really portable) because we have two dependencies without pkg-config
  931. # descriptions: FlatBuffers and MKL-DNN
  932. if (MGE_USE_SYSTEM_MKLDNN)
  933. set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
  934. endif()
  935. if (MGE_USE_SYSTEM_OPENBLAS)
  936. set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
  937. endif()
  938. configure_file(cmake/megengine.pc.in
  939. ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  940. @ONLY)
  941. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  942. DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
  943. # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
  944. if (NOT MGE_WITH_DISTRIBUTED)
  945. include(CMakePackageConfigHelpers)
  946. set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
  947. configure_package_config_file(cmake/MegEngineConfig.cmake.in
  948. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  949. INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
  950. )
  951. write_basic_package_version_file(
  952. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  953. VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
  954. COMPATIBILITY SameMajorVersion)
  955. install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
  956. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  957. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  958. DESTINATION ${MGE_INSTALL_CMAKEDIR})
  959. endif()
  960. if(MSVC OR WIN32)
  961. add_compile_options(
  962. $<$<CONFIG:>:/MT>
  963. $<$<CONFIG:Debug>:/MTd>
  964. $<$<CONFIG:Release>:/MT>
  965. )
  966. foreach (CompilerFlag
  967. CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
  968. CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
  969. CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
  970. CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
  971. if(${CompilerFlag} MATCHES "/MD")
  972. string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
  973. set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
  974. message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
  975. endif()
  976. endforeach()
  977. endif()
  978. if(MGE_WITH_JIT_MLIR)
  979. add_subdirectory(tools/mlir/mgb-opt)
  980. add_subdirectory(tools/mlir/mgb-file-check)
  981. endif()
  982. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  983. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  984. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  985. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  986. endif()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台