You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 48 kB

5 years ago

  1. cmake_minimum_required(VERSION 3.15.2)
  2. message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
  3. if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
  4. message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
  5. endif()
  6. include (cmake/FetchMegBrainVersion.cmake)
  7. project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
  8. set(CMAKE_CXX_STANDARD 14)
  9. set(CMAKE_CXX_STANDARD_REQUIRED ON)
  10. set(CMAKE_CXX_EXTENSIONS OFF)
  11. set(CMAKE_POSITION_INDEPENDENT_CODE ON)
  12. set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
  13. set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
  14. if(NOT MSVC AND NOT APPLE AND NOT WIN32)
  15. set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  16. set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
  17. set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
  18. endif()
  19. include(GNUInstallDirs)
  20. include(CheckCXXCompilerFlag)
  21. include(CheckIPOSupported)
  22. CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
  23. set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
  24. set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
  25. x86_64 i386
  26. armv7 aarch64
  27. naive fallback
  28. )
  29. set (MGE_EXPORT_TARGETS MegEngine-targets)
  30. option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
  31. option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
  32. option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
  33. option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
  34. option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
  35. option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
  36. option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
  37. option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
  38. option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
  39. option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
  40. option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
  41. option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
  42. option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
  43. option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
  44. option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
  45. option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
  46. option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
  47. option(BUILD_SHARED_LIBS "Build shared libraries" ON)
  48. option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
  49. option(MGE_ENABLE_RTTI "Build with RTTI" ON)
  50. option(MGE_ENABLE_LOGGING "Build with logging" ON)
  51. option(MGE_DEBUG_UTIL "Enable debug utility" ON)
  52. option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
  53. option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
  54. option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
  55. option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
  56. option(MGE_BUILD_SDK "Build load_and_run" ON)
  57. option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
  58. option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
  59. option(MGE_WITH_ROCM "Enable ROCM support" OFF)
  60. option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
  61. option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
  62. if(MSVC OR WIN32)
  63. message(STATUS "windows force cudnn static link")
  64. set(MGE_WITH_CUDNN_SHARED OFF)
  65. endif()
  66. if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
  67. set(MGE_WITH_ANY_CUDA_STUB ON)
  68. else()
  69. set(MGE_WITH_ANY_CUDA_STUB OFF)
  70. endif()
  71. if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
  72. message(STATUS "build with BIN REDUCE")
  73. if(MGE_WITH_MINIMUM_SIZE)
  74. set(MGE_ENABLE_RTTI OFF)
  75. set(MGE_ENABLE_LOGGING OFF)
  76. set(MGE_ENABLE_EXCEPTIONS OFF)
  77. set(MGE_INFERENCE_ONLY ON)
  78. endif()
  79. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
  80. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
  81. endif()
  82. if (NOT APPLE)
  83. # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
  84. CHECK_CXX_COMPILER_FLAG("-ffunction-sections -fdata-sections -Wl,--gc-sections" CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
  85. if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
  86. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
  87. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
  88. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
  89. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
  90. endif()
  91. endif()
  92. check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
  93. if(IS_LTO_SUPPORT)
  94. message(STATUS "lto is supported in this compiler")
  95. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
  96. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
  97. else()
  98. message(STATUS "lto is not supported in this compiler")
  99. endif()
  100. if(MGE_WITH_MIDOUT_PROFILE)
  101. message(STATUS "build with MIDOUT PROFILE")
  102. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
  103. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
  104. endif()
  105. if (APPLE)
  106. set (BUILD_SHARED_LIBS OFF)
  107. message(STATUS "build static for xcode framework require")
  108. endif()
  109. if (MGE_USE_SYSTEM_LIB)
  110. set (MGE_CUDA_USE_STATIC OFF)
  111. endif()
  112. if (MGB_WITH_FLATBUFFERS)
  113. set(MGB_ENABLE_FBS_SERIALIZATION ON)
  114. endif()
  115. if(CMAKE_TOOLCHAIN_FILE)
  116. message(STATUS "We are cross compiling.")
  117. message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  118. set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  119. if(ANDROID_TOOLCHAIN_ROOT)
  120. if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
  121. set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
  122. endif()
  123. if(${ANDROID_ARCH} STREQUAL "arm")
  124. set(MGE_ARCH "armv7")
  125. elseif(${ANDROID_ARCH} STREQUAL "arm64")
  126. set(MGE_ARCH "aarch64")
  127. else()
  128. message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
  129. endif()
  130. elseif(IOS_TOOLCHAIN_ROOT)
  131. if(${IOS_ARCH} STREQUAL "armv7")
  132. set(MGE_ARCH "armv7")
  133. elseif(${IOS_ARCH} STREQUAL "arm64")
  134. set(MGE_ARCH "aarch64")
  135. elseif(${IOS_ARCH} STREQUAL "armv7k")
  136. set(MGE_ARCH "armv7")
  137. elseif(${IOS_ARCH} STREQUAL "arm64e")
  138. set(MGE_ARCH "aarch64")
  139. elseif(${IOS_ARCH} STREQUAL "armv7s")
  140. set(MGE_ARCH "armv7")
  141. else()
  142. message(FATAL_ERROR "Unsupported IOS_ARCH.")
  143. endif()
  144. elseif(RISCV_TOOLCHAIN_ROOT)
  145. set(MGE_ARCH "riscv64")
  146. elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
  147. set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  148. else()
  149. message(FATAL_ERROR "Unknown cross-compiling settings.")
  150. endif()
  151. message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
  152. endif()
  153. if(${MGE_ARCH} STREQUAL "AUTO")
  154. if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
  155. set(MGE_ARCH "x86_64")
  156. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  157. set(MGE_ARCH "i386")
  158. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
  159. set(MGE_ARCH "aarch64")
  160. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
  161. set(MGE_ARCH "armv7")
  162. else()
  163. message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
  164. endif()
  165. endif()
  166. if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
  167. message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  168. set(CMAKE_BUILD_TYPE RelWithDebInfo)
  169. endif()
  170. if(${CMAKE_BUILD_TYPE} STREQUAL "Release" AND NOT MGE_WITH_TEST AND NOT ${MGE_ARCH} STREQUAL "x86_64")
  171. set(MGE_ENABLE_RTTI OFF)
  172. message(STATUS "disable MGE_ENABLE_RTTI when Release/NON-x86_64 mode!!")
  173. endif()
  174. if(MSVC OR WIN32)
  175. # for cmake after 3.15.2
  176. cmake_policy(SET CMP0091 NEW)
  177. set(CMAKE_OBJECT_PATH_MAX 300)
  178. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  179. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
  180. else()
  181. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
  182. endif()
  183. add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  184. message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  185. if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
  186. message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
  187. endif()
  188. # on windows need append VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows
  189. # and VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH env
  190. if (MGE_BUILD_WITH_ASAN)
  191. message(WARNING "please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!")
  192. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  193. message(WARNING "Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug")
  194. message(FATAL_ERROR "Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\"")
  195. endif()
  196. if("$ENV{VS_PATH}" STREQUAL "")
  197. message(FATAL_ERROR "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
  198. endif()
  199. if(${MGE_ARCH} STREQUAL "x86_64")
  200. set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
  201. set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
  202. set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
  203. elseif(${MGE_ARCH} STREQUAL "i386")
  204. set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
  205. set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
  206. set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
  207. else()
  208. message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
  209. endif()
  210. find_path(ASAN_DLL_PATH
  211. NAMES ${WINDOWS_ASAN_DLL_NAME}
  212. HINTS $ENV{VS_PATH}
  213. PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
  214. DOC "Windows asan library path" )
  215. if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
  216. message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
  217. endif()
  218. message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
  219. link_directories(${ASAN_DLL_PATH})
  220. link_libraries(${WINDOWS_ASAN_DLL_NAME})
  221. link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
  222. set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
  223. # windows Llvm asan do not take effect when /O2
  224. # RELWITHDEBINFO default value is /O2, so override it
  225. set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
  226. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
  227. set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
  228. set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
  229. else()
  230. set(WIN_FLAGS "/O2")
  231. endif()
  232. # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
  233. set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
  234. # if u CPU is cascadelake series, u can enable for performance
  235. # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
  236. # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
  237. # for windows build
  238. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  239. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
  240. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
  241. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
  242. set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
  243. set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
  244. set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
  245. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  246. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
  247. #FIXME: fix halide JIT on windows
  248. message(STATUS "disable jit, halide and mlir on windows host build...")
  249. set(MGE_WITH_HALIDE OFF)
  250. set(MGE_WITH_JIT OFF)
  251. set(MGE_WITH_JIT_MLIR OFF)
  252. #FIXME: fix MegRay on windows
  253. message(STATUS "Disable distributed build on windows host build...")
  254. set(MGE_WITH_DISTRIBUTED OFF)
  255. else()
  256. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
  257. # NONE windows DEBUG general flags
  258. if(MGE_BUILD_WITH_ASAN)
  259. set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
  260. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
  261. else()
  262. set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
  263. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  264. endif()
  265. # NONE windows opt general flags
  266. if (MGE_BUILD_WITH_ASAN)
  267. set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
  268. elseif(ANDROID)
  269. set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
  270. else()
  271. set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
  272. endif()
  273. set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  274. set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  275. set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  276. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  277. #some gnu(gcc) compiler use -static -libasan have runtime issue
  278. #also, when target is big, clang ld will take a long long long
  279. #time when use -static-libsan, so we use dynamic asan by default
  280. #ANDROID asan.so depends on log, so broadcast log link_libraries
  281. #for megengine depends target, for example flatc target
  282. if (MGE_BUILD_WITH_ASAN AND ANDROID)
  283. link_libraries(log)
  284. endif()
  285. endif()
  286. if(MGE_WITH_CUDA)
  287. include(cmake/cudnn.cmake)
  288. if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  289. message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
  290. set(MGE_WITH_LARGE_ARCHIVE ON)
  291. endif()
  292. endif()
  293. CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
  294. if(MGE_WITH_LARGE_ARCHIVE)
  295. message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
  296. set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
  297. elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
  298. message(STATUS "Using GNU gold linker.")
  299. set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
  300. endif()
  301. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  302. set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  303. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  304. if(NOT MGE_WITH_JIT)
  305. if(MGE_WITH_HALIDE)
  306. message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
  307. set(MGE_WITH_HALIDE OFF)
  308. endif()
  309. if(MGE_WITH_JIT_MLIR)
  310. message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
  311. set(MGE_WITH_JIT_MLIR OFF)
  312. endif()
  313. endif()
  314. # FIXME At present, there are some conflicts between the LLVM that halide
  315. # depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
  316. # versions.
  317. if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
  318. message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
  319. endif()
  320. if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
  321. message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
  322. endif()
  323. if(MGE_WITH_CUDA)
  324. # FIXME: check_language(CUDA) failed when sbsa mode!
  325. # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
  326. if(CMAKE_TOOLCHAIN_FILE)
  327. set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  328. message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
  329. endif()
  330. include(CheckLanguage)
  331. check_language(CUDA)
  332. if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
  333. message(FATAL_ERROR "CUDA compiler not found in PATH")
  334. endif()
  335. # remove this after CMAKE fix nvcc sbsa
  336. if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
  337. set(CMAKE_CUDA_COMPILER "nvcc")
  338. message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
  339. endif()
  340. enable_language(CUDA)
  341. set(CMAKE_CUDA_STANDARD 14)
  342. set(CMAKE_CUDA_STANDARD_REQUIRED ON)
  343. endif()
  344. if(NOT MGE_WITH_CUDA)
  345. if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
  346. message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
  347. set(MGE_WITH_JIT OFF)
  348. set(MGE_WITH_JIT_MLIR OFF)
  349. endif()
  350. set(MGE_WITH_HALIDE OFF)
  351. message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
  352. set(MGE_WITH_TRT OFF)
  353. endif()
  354. find_package(PythonInterp 3 REQUIRED)
  355. # NOTICE: just use for target, which do not depend on python api
  356. # PURPOSE: reuse target obj when switch python3 version
  357. # will fallback to PYTHON_EXECUTABLE if can not find in PATH env
  358. set(PYTHON3_IN_ENV "python3")
  359. find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  360. if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
  361. message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
  362. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  363. else()
  364. message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
  365. target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
  366. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
  367. endif()
  368. set(THREADS_PREFER_PTHREAD_FLAG ON)
  369. find_package(Threads)
  370. if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
  371. if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
  372. set_property(TARGET Threads::Threads
  373. PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
  374. "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  375. endif()
  376. endif()
  377. set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
  378. set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
  379. set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
  380. if(NOT CMAKE_CUDA_HOST_COMPILER)
  381. set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
  382. endif()
  383. if(NOT MGE_ENABLE_RTTI)
  384. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
  385. endif()
  386. if(NOT MGE_ENABLE_EXCEPTIONS)
  387. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
  388. endif()
  389. if(MGE_WITH_TEST)
  390. include(cmake/gtest.cmake)
  391. endif()
  392. if(MGE_BUILD_IMPERATIVE_RT)
  393. set(CMAKE_CXX_STANDARD 17)
  394. endif()
  395. if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
  396. message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
  397. set(MGE_WITH_DISTRIBUTED OFF)
  398. endif()
  399. if(MGE_INFERENCE_ONLY)
  400. message(STATUS "Disable distributed support for inference only build.")
  401. set(MGE_WITH_DISTRIBUTED OFF)
  402. message(STATUS "Disable imperative_rt python module for inference only build.")
  403. set(MGE_BUILD_IMPERATIVE_RT OFF)
  404. endif()
  405. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  406. include(cmake/llvm-project.cmake)
  407. endif()
  408. if(MGE_WITH_DISTRIBUTED)
  409. include(cmake/protobuf.cmake)
  410. include(cmake/zmq.cmake)
  411. endif()
  412. if(MGB_WITH_FLATBUFFERS)
  413. include(cmake/flatbuffers.cmake)
  414. endif()
  415. if(MGE_WITH_CUDA)
  416. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  417. foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  418. get_filename_component(_NAME ${path} NAME)
  419. if(NOT ${_NAME} STREQUAL "stubs")
  420. list(APPEND CUDA_LINK_DIRECTORIES ${path})
  421. endif()
  422. endforeach()
  423. link_directories(${CUDA_LINK_DIRECTORIES})
  424. set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  425. set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  426. set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  427. set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  428. if(MSVC OR WIN32)
  429. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
  430. set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
  431. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  432. set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
  433. endif()
  434. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
  435. else()
  436. set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  437. endif()
  438. if(NOT MGE_ENABLE_RTTI)
  439. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  440. endif()
  441. if(NOT MGE_ENABLE_EXCEPTIONS)
  442. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
  443. endif()
  444. if(NOT MGE_CUDA_GENCODE)
  445. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
  446. set(MEGDNN_THREADS_512 0)
  447. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  448. message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
  449. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  450. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  451. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  452. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
  453. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  454. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  455. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  456. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  457. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
  458. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
  459. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
  460. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  461. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  462. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  463. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  464. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
  465. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  466. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  467. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  468. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  469. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  470. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  471. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
  472. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
  473. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  474. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  475. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  476. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  477. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
  478. else()
  479. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
  480. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  481. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  482. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  483. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
  484. endif()
  485. else()
  486. message(FATAL_ERROR "Unsupported CUDA host arch.")
  487. endif()
  488. else()
  489. set(MEGDNN_THREADS_512 1)
  490. endif()
  491. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  492. if(MGE_WITH_TRT)
  493. include(cmake/tensorrt.cmake)
  494. endif()
  495. if(MGE_CUDA_USE_STATIC)
  496. if(MGE_WITH_TRT)
  497. if(MSVC OR WIN32)
  498. message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
  499. list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
  500. else()
  501. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
  502. endif()
  503. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  504. message(STATUS "handle trt myelin lib after trt7")
  505. list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
  506. endif()
  507. endif()
  508. if("${CUDNN_VERSION}" STREQUAL "7.5.0")
  509. if(MSVC OR WIN32)
  510. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  511. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  512. else()
  513. message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
  514. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
  515. endif()
  516. else()
  517. if(MSVC OR WIN32)
  518. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  519. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  520. else()
  521. list(APPEND MGE_CUDA_LIBS libcudnn)
  522. endif()
  523. endif()
  524. if(MSVC OR WIN32)
  525. list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
  526. else()
  527. list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
  528. endif()
  529. if(MSVC OR WIN32)
  530. list(APPEND MGE_CUDA_LIBS cublas.lib)
  531. else()
  532. if(MGE_WITH_CUBLAS_SHARED)
  533. list(APPEND MGE_CUDA_LIBS cublas)
  534. else()
  535. list(APPEND MGE_CUDA_LIBS cublas_static)
  536. endif()
  537. endif()
  538. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  539. if(MSVC OR WIN32)
  540. list(APPEND MGE_CUDA_LIBS cublasLt.lib)
  541. else()
  542. if(MGE_WITH_CUBLAS_SHARED)
  543. list(APPEND MGE_CUDA_LIBS cublasLt)
  544. else()
  545. list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
  546. endif()
  547. endif()
  548. endif()
  549. if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
  550. # mark all symbols from liblapack_static.a as weak to avoid
  551. # duplicated definition with mkl
  552. find_library(
  553. LAPACK_STATIC_PATH lapack_static
  554. HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  555. if(NOT LAPACK_STATIC_PATH)
  556. message(FATAL_ERROR "liblapack_static.a not found")
  557. endif()
  558. set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
  559. # add a target that run objcopy
  560. add_custom_command(
  561. OUTPUT ${LAPACK_STATIC_COPY_PATH}
  562. COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
  563. VERBATIM)
  564. add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
  565. # create a library named "lapack_static_weak"
  566. add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
  567. add_dependencies(lapack_static_weak lapack_static_weak_target)
  568. set_target_properties(
  569. lapack_static_weak PROPERTIES
  570. IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
  571. list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
  572. endif()
  573. else()
  574. if(MGE_WITH_TRT)
  575. list(APPEND MGE_CUDA_LIBS libnvinfer)
  576. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  577. message(STATUS "handle trt myelin lib after trt7")
  578. list(APPEND MGE_CUDA_LIBS libmyelin)
  579. endif()
  580. endif()
  581. list(APPEND MGE_CUDA_LIBS libcudnn)
  582. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  583. list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
  584. endif()
  585. list(APPEND MGE_CUDA_LIBS cudart)
  586. endif()
  587. if(NOT MGE_WITH_CUDA_STUB)
  588. if(MSVC OR WIN32)
  589. list(APPEND MGE_CUDA_LIBS cuda.lib)
  590. else()
  591. list(APPEND MGE_CUDA_LIBS cuda)
  592. endif()
  593. endif()
  594. if(NOT MGE_WITH_NVRTC_STUB)
  595. if(MSVC OR WIN32)
  596. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  597. else()
  598. list(APPEND MGE_CUDA_LIBS nvrtc)
  599. endif()
  600. endif()
  601. if(MGE_WITH_ANY_CUDA_STUB)
  602. add_subdirectory(dnn/cuda-stub)
  603. list(APPEND MGE_CUDA_LIBS cuda-stub)
  604. endif()
  605. if(MSVC OR WIN32)
  606. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  607. else()
  608. list(APPEND MGE_CUDA_LIBS nvToolsExt)
  609. endif()
  610. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
  611. if(UNIX)
  612. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
  613. endif()
  614. endif()
  615. ###########please add_subdirectory from here###############
  616. if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
  617. option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
  618. if(MGE_ENABLE_CPUINFO)
  619. message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
  620. add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
  621. include(cmake/cpuinfo.cmake)
  622. endif()
  623. endif()
  624. if(MGE_WITH_CAMBRICON)
  625. include_directories("$ENV{NEUWARE_HOME}/include")
  626. link_directories("$ENV{NEUWARE_HOME}/lib64")
  627. include(cmake/FindBANG/FindBANG.cmake)
  628. if (${MGE_MLU_ARCH} STREQUAL "MLU100")
  629. set(BANG_ARCH "100")
  630. elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
  631. set(BANG_ARCH "110")
  632. elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
  633. set(BANG_ARCH "220")
  634. elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
  635. set(BANG_ARCH "270")
  636. elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
  637. set(BANG_ARCH "290")
  638. elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
  639. set(BANG_ARCH "200")
  640. else()
  641. message (FATAL_ERROR "Unsupported MLU arch.")
  642. endif()
  643. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
  644. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
  645. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
  646. if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  647. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
  648. elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
  649. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
  650. elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  651. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
  652. elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
  653. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
  654. endif()
  655. include(cmake/cnrt.cmake)
  656. include(cmake/cndev.cmake)
  657. include(cmake/cnml.cmake)
  658. list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
  659. set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
  660. endif()
  661. if (MGE_WITH_ROCM)
  662. include(cmake/rocm.cmake)
  663. endif ()
  664. if(MGE_WITH_ATLAS)
  665. add_subdirectory(dnn/atlas-stub)
  666. list(APPEND MGE_ATLAS_LIBS atlas-stub)
  667. set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
  668. set(MGB_ATLAS ${MGE_WITH_ATLAS})
  669. endif()
  670. find_program(CCACHE_BIN ccache)
  671. if(CCACHE_BIN)
  672. set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  673. if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
  674. message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
  675. set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  676. endif()
  677. endif()
  678. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  679. if(${MGE_BLAS} STREQUAL "MKL")
  680. include(cmake/mkl.cmake)
  681. set(MGE_BLAS_LIBS libmkl)
  682. elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
  683. include(cmake/OpenBLAS.cmake)
  684. set(MGE_BLAS_LIBS libopenblas)
  685. else()
  686. message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  687. endif()
  688. endif()
  689. # MKLDNN build
  690. if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
  691. include(cmake/MKL_DNN.cmake)
  692. set(MEGDNN_X86_WITH_MKL_DNN 1)
  693. endif()
  694. # RTTI
  695. if(MGE_ENABLE_RTTI)
  696. set(MEGDNN_ENABLE_MANGLING 0)
  697. set(MEGDNN_ENABLE_RTTI 1)
  698. else()
  699. set(MEGDNN_ENABLE_MANGLING 1)
  700. set(MEGDNN_ENABLE_RTTI 0)
  701. endif()
  702. set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
  703. # Logging
  704. set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  705. set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  706. set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
  707. # Exception
  708. if(NOT MGE_ENABLE_EXCEPTIONS)
  709. message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
  710. endif()
  711. set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
  712. set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
  713. # JIT
  714. if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
  715. set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
  716. include(cmake/Halide.cmake)
  717. endif()
  718. # Thread
  719. IF(APPLE)
  720. set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  721. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  722. set(CMAKE_USE_WIN32_THREADS_INIT 0)
  723. set(CMAKE_USE_PTHREADS_INIT 1)
  724. set(THREADS_PREFER_PTHREAD_FLAG ON)
  725. message(STATUS "disable jit, halide and mlir on macos host build...")
  726. set(MGE_WITH_HALIDE OFF)
  727. set(MGE_WITH_JIT OFF)
  728. set(MGE_WITH_JIT_MLIR OFF)
  729. ENDIF()
  730. set(MGB_JIT ${MGE_WITH_JIT})
  731. set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
  732. set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
  733. # for consumer override MGB_C_OPR_INIT_FUNC symbol interface
  734. if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
  735. add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
  736. message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
  737. endif()
  738. if(MSVC OR WIN32)
  739. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  740. set(CMAKE_USE_WIN32_THREADS_INIT 1)
  741. set(CMAKE_USE_PTHREADS_INIT 1)
  742. set(THREADS_PREFER_PTHREAD_FLAG ON)
  743. endif()
  744. if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
  745. set(MGB_HAVE_THREAD 1)
  746. endif()
  747. if(MGE_WITH_TEST)
  748. # use intra-op multi threads
  749. set(MEGDNN_ENABLE_MULTI_THREADS 1)
  750. endif()
  751. # CUDA
  752. set(MGB_CUDA ${MGE_WITH_CUDA})
  753. set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
  754. #ROCM
  755. set(MGB_ROCM ${MGE_WITH_ROCM})
  756. set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
  757. # CAMBRICON
  758. set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
  759. set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
  760. # ENFLAME
  761. set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
  762. set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
  763. # Debug info
  764. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  765. set(MGB_ASSERT_LOC 1)
  766. set(MGB_ENABLE_DEBUG_UTIL 1)
  767. else()
  768. set(MGB_ASSERT_LOC 0)
  769. set(MGB_ENABLE_DEBUG_UTIL 0)
  770. endif()
  771. # TensorRT
  772. set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
  773. # Inference only
  774. if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
  775. set(MGB_ENABLE_GRAD 0)
  776. set(MGB_BUILD_SLIM_SERVING 1)
  777. else()
  778. set(MGB_ENABLE_GRAD 1)
  779. set(MGB_BUILD_SLIM_SERVING 0)
  780. endif()
  781. # Distributed communication
  782. set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
  783. # MGE_ARCH related flags
  784. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  785. if(MGE_BLAS STREQUAL "MKL")
  786. set(MEGDNN_X86_WITH_MKL 1)
  787. elseif(MGE_BLAS STREQUAL "OpenBLAS")
  788. set(MEGDNN_X86_WITH_OPENBLAS 1)
  789. endif()
  790. endif()
  791. # Enable Naive
  792. if(MGE_ARCH STREQUAL "naive")
  793. set(MEGDNN_NAIVE 1)
  794. message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
  795. endif()
  796. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  797. set(MEGDNN_X86 1)
  798. if(MGE_ARCH STREQUAL "x86_64")
  799. set(MEGDNN_X86_64 1)
  800. set(MEGDNN_64_BIT 1)
  801. if(NOT MSVC)
  802. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
  803. endif()
  804. else()
  805. set(MEGDNN_X86_32 1)
  806. if(NOT MSVC)
  807. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
  808. endif()
  809. endif()
  810. if(NOT MSVC)
  811. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  812. endif()
  813. endif()
  814. # dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
  815. if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
  816. CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
  817. if(CXX_COMPILER_SUPPORT_DOT)
  818. message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
  819. set(MGB_ENABLE_DOT 1)
  820. endif()
  821. endif()
  822. if(MGE_ARCH STREQUAL "armv7")
  823. # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  824. if(ANDROID)
  825. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  826. endif()
  827. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  828. set (MARCH "-march=armv7-a")
  829. set (MEGDNN_ARMV7 1)
  830. endif()
  831. if(MGE_ARCH STREQUAL "aarch64")
  832. set(MEGDNN_AARCH64 1)
  833. set(MEGDNN_64_BIT 1)
  834. set(MARCH "-march=armv8-a")
  835. set(MGB_AARCH64 1)
  836. if(MGE_ARMV8_2_FEATURE_FP16)
  837. message(STATUS "Enable fp16 feature support in armv8.2")
  838. if(NOT ${MGE_DISABLE_FLOAT16})
  839. set(MEGDNN_ENABLE_FP16_NEON 1)
  840. endif()
  841. set(MARCH "-march=armv8.2-a+fp16")
  842. endif()
  843. if(MGE_WITH_CUDA)
  844. message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
  845. when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
  846. for save link time(14min->1min), you may open below flags if not deploy on\
  847. arm a53 platform, or just build release type!")
  848. #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
  849. endif()
  850. endif()
  851. if(MGE_ARCH STREQUAL "riscv64")
  852. set(MEGDNN_RISCV64 1)
  853. set(MEGDNN_64_BIT 1)
  854. endif()
  855. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
  856. set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")
  857. # Write out megbrain_build_config.h
  858. # It defines macros needed by both megbrain and dnn
  859. configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
  860. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  861. add_subdirectory(dnn)
  862. list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
  863. set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
  864. set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
  865. file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
  866. add_custom_command(
  867. OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  868. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  869. DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  870. VERBATIM
  871. )
  872. list(APPEND MGB_OPR_PARAM_DEFS_OUTS
  873. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  874. )
  875. install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
  876. list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
  877. add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
  878. add_library(mgb_opr_param_defs INTERFACE)
  879. target_include_directories(mgb_opr_param_defs
  880. INTERFACE
  881. $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  882. $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
  883. )
  884. add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
  885. install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
  886. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  887. # generate param_defs.td
  888. set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
  889. set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
  890. set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
  891. set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
  892. set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
  893. file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
  894. file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
  895. file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
  896. file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
  897. add_custom_command(
  898. OUTPUT ${OPR_PARAM_DEFS_OUT}
  899. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
  900. DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
  901. VERBATIM
  902. )
  903. # mlir tblgen sources
  904. set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
  905. set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
  906. list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
  907. file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
  908. add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
  909. endif()
  910. if(MGE_WITH_DISTRIBUTED)
  911. set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
  912. set(MEGRAY_WITH_SHM ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
  913. set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
  914. add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
  915. endif()
  916. add_subdirectory(src)
  917. if(MGE_BUILD_SDK)
  918. add_subdirectory(sdk/load-and-run)
  919. endif()
  920. if(MGE_BUILD_IMPERATIVE_RT)
  921. add_subdirectory(imperative)
  922. message(STATUS "Enable imperative python wrapper runtime")
  923. endif()
  924. if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
  925. add_subdirectory(test)
  926. endif()
  927. if(TARGET _imperative_rt)
  928. add_custom_target(
  929. develop
  930. COMMAND ${CMAKE_COMMAND} -E create_symlink
  931. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  932. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  933. COMMAND ${CMAKE_COMMAND} -E create_symlink
  934. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  935. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  936. DEPENDS _imperative_rt
  937. VERBATIM
  938. )
  939. endif()
  940. # Configure and install pkg-config.
  941. # Note that unlike the Config.cmake modules, this is not relocatable (and not
  942. # really portable) because we have two dependencies without pkg-config
  943. # descriptions: FlatBuffers and MKL-DNN
  944. if (MGE_USE_SYSTEM_MKLDNN)
  945. set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
  946. endif()
  947. if (MGE_USE_SYSTEM_OPENBLAS)
  948. set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
  949. endif()
  950. configure_file(cmake/megengine.pc.in
  951. ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  952. @ONLY)
  953. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  954. DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
  955. # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
  956. if (NOT MGE_WITH_DISTRIBUTED)
  957. include(CMakePackageConfigHelpers)
  958. set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
  959. configure_package_config_file(cmake/MegEngineConfig.cmake.in
  960. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  961. INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
  962. )
  963. write_basic_package_version_file(
  964. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  965. VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
  966. COMPATIBILITY SameMajorVersion)
  967. install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
  968. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  969. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  970. DESTINATION ${MGE_INSTALL_CMAKEDIR})
  971. endif()
  972. if(MSVC OR WIN32)
  973. add_compile_options(
  974. $<$<CONFIG:>:/MT>
  975. $<$<CONFIG:Debug>:/MTd>
  976. $<$<CONFIG:Release>:/MT>
  977. )
  978. foreach (CompilerFlag
  979. CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
  980. CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
  981. CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
  982. CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
  983. if(${CompilerFlag} MATCHES "/MD")
  984. string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
  985. set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
  986. message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
  987. endif()
  988. endforeach()
  989. endif()
  990. if(MGE_WITH_JIT_MLIR)
  991. add_subdirectory(tools/mlir/mgb-opt)
  992. add_subdirectory(tools/mlir/mgb-file-check)
  993. endif()
  994. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  995. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  996. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  997. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  998. endif()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台