You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 42 kB

5 years ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021
  1. cmake_minimum_required(VERSION 3.15.2)
  2. message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
  3. if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
  4. message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
  5. endif()
  6. include (cmake/FetchMegBrainVersion.cmake)
  7. project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
  8. set(CMAKE_CXX_STANDARD 14)
  9. set(CMAKE_CXX_STANDARD_REQUIRED ON)
  10. set(CMAKE_CXX_EXTENSIONS OFF)
  11. set(CMAKE_POSITION_INDEPENDENT_CODE ON)
  12. set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
  13. set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
  14. if(NOT MSVC AND NOT APPLE AND NOT WIN32)
  15. set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  16. set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
  17. set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
  18. endif()
  19. include(GNUInstallDirs)
  20. include(CheckCXXCompilerFlag)
  21. CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
  22. set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
  23. set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
  24. x86_64 i386
  25. armv7 aarch64
  26. naive fallback
  27. )
  28. set (MGE_EXPORT_TARGETS MegEngine-targets)
  29. option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
  30. option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
  31. option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
  32. option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
  33. option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
  34. option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
  35. option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
  36. option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
  37. option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
  38. option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
  39. option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
  40. option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
  41. option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
  42. option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
  43. option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
  44. option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
  45. option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
  46. option(BUILD_SHARED_LIBS "Build shared libraries" ON)
  47. option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
  48. option(MGE_ENABLE_RTTI "Build with RTTI" ON)
  49. option(MGE_ENABLE_LOGGING "Build with logging" ON)
  50. option(MGE_DEBUG_UTIL "Enable debug utility" ON)
  51. option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
  52. option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
  53. option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
  54. option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
  55. option(MGE_BUILD_SDK "Build load_and_run" ON)
  56. option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
  57. option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
  58. option(MGE_WITH_ROCM "Enable ROCM support" OFF)
  59. option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
  60. if(MSVC OR WIN32)
  61. message(STATUS "windows force cudnn static link")
  62. set(MGE_WITH_CUDNN_SHARED OFF)
  63. endif()
  64. if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
  65. set(MGE_WITH_ANY_CUDA_STUB ON)
  66. else()
  67. set(MGE_WITH_ANY_CUDA_STUB OFF)
  68. endif()
  69. if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
  70. message(STATUS "build with BIN REDUCE")
  71. if(MGE_WITH_MINIMUM_SIZE)
  72. set(MGE_ENABLE_RTTI OFF)
  73. set(MGE_ENABLE_LOGGING OFF)
  74. set(MGE_ENABLE_EXCEPTIONS OFF)
  75. set(MGE_INFERENCE_ONLY ON)
  76. endif()
  77. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
  78. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
  79. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
  80. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
  81. endif()
  82. if(MGE_WITH_MIDOUT_PROFILE)
  83. message(STATUS "build with MIDOUT PROFILE")
  84. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
  85. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
  86. endif()
  87. if (APPLE)
  88. set (BUILD_SHARED_LIBS OFF)
  89. message(STATUS "build static for xcode framework require")
  90. endif()
  91. if (MGE_USE_SYSTEM_LIB)
  92. set (MGE_CUDA_USE_STATIC OFF)
  93. endif()
  94. if (MGB_WITH_FLATBUFFERS)
  95. set(MGB_ENABLE_FBS_SERIALIZATION ON)
  96. endif()
  97. if(CMAKE_TOOLCHAIN_FILE)
  98. message(STATUS "We are cross compiling.")
  99. message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  100. set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  101. if(ANDROID_TOOLCHAIN_ROOT)
  102. if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
  103. set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
  104. endif()
  105. if(${ANDROID_ARCH} STREQUAL "arm")
  106. set(MGE_ARCH "armv7")
  107. elseif(${ANDROID_ARCH} STREQUAL "arm64")
  108. set(MGE_ARCH "aarch64")
  109. else()
  110. message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
  111. endif()
  112. elseif(IOS_TOOLCHAIN_ROOT)
  113. if(${IOS_ARCH} STREQUAL "armv7")
  114. set(MGE_ARCH "armv7")
  115. elseif(${IOS_ARCH} STREQUAL "arm64")
  116. set(MGE_ARCH "aarch64")
  117. elseif(${IOS_ARCH} STREQUAL "armv7k")
  118. set(MGE_ARCH "armv7")
  119. elseif(${IOS_ARCH} STREQUAL "arm64e")
  120. set(MGE_ARCH "aarch64")
  121. elseif(${IOS_ARCH} STREQUAL "armv7s")
  122. set(MGE_ARCH "armv7")
  123. else()
  124. message(FATAL_ERROR "Unsupported IOS_ARCH.")
  125. endif()
  126. elseif(RISCV_TOOLCHAIN_ROOT)
  127. set(MGE_ARCH "riscv64")
  128. elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
  129. set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  130. else()
  131. message(FATAL_ERROR "Unknown cross-compiling settings.")
  132. endif()
  133. message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
  134. endif()
  135. if(${MGE_ARCH} STREQUAL "AUTO")
  136. if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
  137. set(MGE_ARCH "x86_64")
  138. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  139. set(MGE_ARCH "i386")
  140. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
  141. set(MGE_ARCH "aarch64")
  142. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
  143. set(MGE_ARCH "armv7")
  144. else()
  145. message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
  146. endif()
  147. endif()
  148. if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
  149. option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
  150. if(MGE_ENABLE_CPUINFO)
  151. message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
  152. add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
  153. include(cmake/cpuinfo.cmake)
  154. endif()
  155. endif()
  156. if(MSVC OR WIN32)
  157. # for cmake after 3.15.2
  158. cmake_policy(SET CMP0091 NEW)
  159. set(CMAKE_OBJECT_PATH_MAX 300)
  160. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  161. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
  162. else()
  163. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
  164. endif()
  165. add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  166. message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  167. if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
  168. message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
  169. endif()
  170. # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
  171. set(WIN_FLAGS "-msse4.2 -O2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
  172. # if u CPU is cascadelake series, u can enable for performance
  173. # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
  174. # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
  175. # for windows build
  176. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  177. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
  178. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
  179. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
  180. set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
  181. set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
  182. set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
  183. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  184. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
  185. #FIXME: fix halide JIT on windows
  186. message(STATUS "disable jit, halide and mlir on windows host build...")
  187. set(MGE_WITH_HALIDE OFF)
  188. set(MGE_WITH_JIT OFF)
  189. set(MGE_WITH_JIT_MLIR OFF)
  190. #FIXME: fix MegRay on windows
  191. message(STATUS "Disable distributed build on windows host build...")
  192. set(MGE_WITH_DISTRIBUTED OFF)
  193. else()
  194. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
  195. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  196. if(ANDROID)
  197. set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG")
  198. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -DNDEBUG -g")
  199. else()
  200. set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
  201. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -DNDEBUG -g")
  202. endif()
  203. endif()
  204. if(MGE_WITH_CUDA)
  205. include(cmake/cudnn.cmake)
  206. if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  207. message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
  208. set(MGE_WITH_LARGE_ARCHIVE ON)
  209. endif()
  210. endif()
  211. CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
  212. if(MGE_WITH_LARGE_ARCHIVE)
  213. message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
  214. set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
  215. elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
  216. message(STATUS "Using GNU gold linker.")
  217. set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
  218. endif()
  219. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  220. set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  221. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  222. if(NOT MGE_WITH_JIT)
  223. if(MGE_WITH_HALIDE)
  224. message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
  225. set(MGE_WITH_HALIDE OFF)
  226. endif()
  227. if(MGE_WITH_JIT_MLIR)
  228. message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
  229. set(MGE_WITH_JIT_MLIR OFF)
  230. endif()
  231. endif()
  232. # FIXME At present, there are some conflicts between the LLVM that halide
  233. # depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
  234. # versions.
  235. if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
  236. message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
  237. endif()
  238. if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
  239. message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
  240. endif()
  241. if(MGE_WITH_CUDA)
  242. # FIXME: check_language(CUDA) failed when sbsa mode!
  243. # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
  244. if(CMAKE_TOOLCHAIN_FILE)
  245. set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  246. message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
  247. endif()
  248. include(CheckLanguage)
  249. check_language(CUDA)
  250. if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
  251. message(FATAL_ERROR "CUDA compiler not found in PATH")
  252. endif()
  253. # remove this after CMAKE fix nvcc sbsa
  254. if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
  255. set(CMAKE_CUDA_COMPILER "nvcc")
  256. message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
  257. endif()
  258. enable_language(CUDA)
  259. set(CMAKE_CUDA_STANDARD 14)
  260. set(CMAKE_CUDA_STANDARD_REQUIRED ON)
  261. endif()
  262. if(NOT MGE_WITH_CUDA)
  263. if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
  264. message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
  265. set(MGE_WITH_JIT OFF)
  266. set(MGE_WITH_JIT_MLIR OFF)
  267. endif()
  268. set(MGE_WITH_HALIDE OFF)
  269. message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
  270. set(MGE_WITH_TRT OFF)
  271. endif()
  272. find_package(PythonInterp 3 REQUIRED)
  273. # NOTICE: just use for target, which do not depend on python api
  274. # PURPOSE: reuse target obj when switch python3 version
  275. # will fallback to PYTHON_EXECUTABLE if can not find in PATH env
  276. set(PYTHON3_IN_ENV "python3")
  277. find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  278. if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
  279. message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
  280. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
  281. else()
  282. message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
  283. target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
  284. set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
  285. endif()
  286. set(THREADS_PREFER_PTHREAD_FLAG ON)
  287. find_package(Threads)
  288. if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
  289. if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
  290. set_property(TARGET Threads::Threads
  291. PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
  292. "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  293. endif()
  294. endif()
  295. set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
  296. set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
  297. set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
  298. if(NOT CMAKE_CUDA_HOST_COMPILER)
  299. set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
  300. endif()
  301. if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
  302. message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  303. set(CMAKE_BUILD_TYPE RelWithDebInfo)
  304. endif()
  305. if(NOT MGE_ENABLE_RTTI)
  306. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
  307. endif()
  308. if(NOT MGE_ENABLE_EXCEPTIONS)
  309. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
  310. endif()
  311. if(MGE_WITH_TEST)
  312. include(cmake/gtest.cmake)
  313. endif()
  314. if(MGE_BUILD_IMPERATIVE_RT)
  315. set(CMAKE_CXX_STANDARD 17)
  316. endif()
  317. if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
  318. message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
  319. set(MGE_WITH_DISTRIBUTED OFF)
  320. endif()
  321. if(MGE_INFERENCE_ONLY)
  322. message(STATUS "Disable distributed support for inference only build.")
  323. set(MGE_WITH_DISTRIBUTED OFF)
  324. message(STATUS "Disable imperative_rt python module for inference only build.")
  325. set(MGE_BUILD_IMPERATIVE_RT OFF)
  326. endif()
  327. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  328. include(cmake/llvm-project.cmake)
  329. endif()
  330. if(MGE_WITH_DISTRIBUTED)
  331. include(cmake/protobuf.cmake)
  332. include(cmake/zmq.cmake)
  333. endif()
  334. if(MGB_WITH_FLATBUFFERS)
  335. include(cmake/flatbuffers.cmake)
  336. endif()
  337. if(MGE_WITH_CUDA)
  338. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  339. foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  340. get_filename_component(_NAME ${path} NAME)
  341. if(NOT ${_NAME} STREQUAL "stubs")
  342. list(APPEND CUDA_LINK_DIRECTORIES ${path})
  343. endif()
  344. endforeach()
  345. link_directories(${CUDA_LINK_DIRECTORIES})
  346. set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  347. set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  348. set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  349. set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  350. if(MSVC OR WIN32)
  351. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
  352. set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
  353. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  354. set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
  355. endif()
  356. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
  357. else()
  358. set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  359. endif()
  360. if(NOT MGE_ENABLE_RTTI)
  361. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  362. endif()
  363. if(NOT MGE_ENABLE_EXCEPTIONS)
  364. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
  365. endif()
  366. if(NOT MGE_CUDA_GENCODE)
  367. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
  368. set(MEGDNN_THREADS_512 0)
  369. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  370. message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
  371. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  372. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  373. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  374. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
  375. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  376. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  377. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  378. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  379. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
  380. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
  381. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
  382. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  383. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  384. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  385. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  386. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
  387. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  388. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  389. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  390. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  391. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  392. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  393. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
  394. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
  395. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  396. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  397. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  398. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  399. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
  400. else()
  401. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
  402. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  403. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  404. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  405. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
  406. endif()
  407. else()
  408. message(FATAL_ERROR "Unsupported CUDA host arch.")
  409. endif()
  410. else()
  411. set(MEGDNN_THREADS_512 1)
  412. endif()
  413. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  414. if(MGE_WITH_TRT)
  415. include(cmake/tensorrt.cmake)
  416. endif()
  417. if(MGE_CUDA_USE_STATIC)
  418. if(MGE_WITH_TRT)
  419. if(MSVC OR WIN32)
  420. message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
  421. list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
  422. else()
  423. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
  424. endif()
  425. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  426. message(STATUS "handle trt myelin lib after trt7")
  427. list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
  428. endif()
  429. endif()
  430. if("${CUDNN_VERSION}" STREQUAL "7.5.0")
  431. if(MSVC OR WIN32)
  432. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  433. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  434. else()
  435. message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
  436. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
  437. endif()
  438. else()
  439. if(MSVC OR WIN32)
  440. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  441. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  442. else()
  443. list(APPEND MGE_CUDA_LIBS libcudnn)
  444. endif()
  445. endif()
  446. if(MSVC OR WIN32)
  447. list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
  448. else()
  449. list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
  450. endif()
  451. if(MSVC OR WIN32)
  452. list(APPEND MGE_CUDA_LIBS cublas.lib)
  453. else()
  454. if(MGE_WITH_CUBLAS_SHARED)
  455. list(APPEND MGE_CUDA_LIBS cublas)
  456. else()
  457. list(APPEND MGE_CUDA_LIBS cublas_static)
  458. endif()
  459. endif()
  460. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  461. if(MSVC OR WIN32)
  462. list(APPEND MGE_CUDA_LIBS cublasLt.lib)
  463. else()
  464. if(MGE_WITH_CUBLAS_SHARED)
  465. list(APPEND MGE_CUDA_LIBS cublasLt)
  466. else()
  467. list(APPEND MGE_CUDA_LIBS cublasLt_static)
  468. endif()
  469. endif()
  470. endif()
  471. if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
  472. # mark all symbols from liblapack_static.a as weak to avoid
  473. # duplicated definition with mkl
  474. find_library(
  475. LAPACK_STATIC_PATH lapack_static
  476. HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  477. if(NOT LAPACK_STATIC_PATH)
  478. message(FATAL_ERROR "liblapack_static.a not found")
  479. endif()
  480. set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
  481. # add a target that run objcopy
  482. add_custom_command(
  483. OUTPUT ${LAPACK_STATIC_COPY_PATH}
  484. COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
  485. VERBATIM)
  486. add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
  487. # create a library named "lapack_static_weak"
  488. add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
  489. add_dependencies(lapack_static_weak lapack_static_weak_target)
  490. set_target_properties(
  491. lapack_static_weak PROPERTIES
  492. IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
  493. list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
  494. endif()
  495. else()
  496. if(MGE_WITH_TRT)
  497. list(APPEND MGE_CUDA_LIBS libnvinfer)
  498. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  499. message(STATUS "handle trt myelin lib after trt7")
  500. list(APPEND MGE_CUDA_LIBS libmyelin)
  501. endif()
  502. endif()
  503. list(APPEND MGE_CUDA_LIBS libcudnn)
  504. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  505. list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
  506. endif()
  507. list(APPEND MGE_CUDA_LIBS cudart)
  508. endif()
  509. if(NOT MGE_WITH_CUDA_STUB)
  510. if(MSVC OR WIN32)
  511. list(APPEND MGE_CUDA_LIBS cuda.lib)
  512. else()
  513. list(APPEND MGE_CUDA_LIBS cuda)
  514. endif()
  515. endif()
  516. if(NOT MGE_WITH_NVRTC_STUB)
  517. if(MSVC OR WIN32)
  518. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  519. else()
  520. list(APPEND MGE_CUDA_LIBS nvrtc)
  521. endif()
  522. endif()
  523. if(MGE_WITH_ANY_CUDA_STUB)
  524. add_subdirectory(dnn/cuda-stub)
  525. list(APPEND MGE_CUDA_LIBS cuda-stub)
  526. endif()
  527. if(MSVC OR WIN32)
  528. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  529. else()
  530. list(APPEND MGE_CUDA_LIBS nvToolsExt)
  531. endif()
  532. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
  533. if(UNIX)
  534. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
  535. endif()
  536. endif()
  537. if(MGE_WITH_CAMBRICON)
  538. include_directories("$ENV{NEUWARE_HOME}/include")
  539. link_directories("$ENV{NEUWARE_HOME}/lib64")
  540. include(cmake/FindBANG/FindBANG.cmake)
  541. if (${MGE_MLU_ARCH} STREQUAL "MLU100")
  542. set(BANG_ARCH "100")
  543. elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
  544. set(BANG_ARCH "110")
  545. elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
  546. set(BANG_ARCH "220")
  547. elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
  548. set(BANG_ARCH "270")
  549. elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
  550. set(BANG_ARCH "290")
  551. elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
  552. set(BANG_ARCH "200")
  553. else()
  554. message (FATAL_ERROR "Unsupported MLU arch.")
  555. endif()
  556. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
  557. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
  558. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
  559. if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  560. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
  561. elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
  562. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
  563. elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  564. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
  565. elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
  566. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
  567. endif()
  568. include(cmake/cnrt.cmake)
  569. include(cmake/cndev.cmake)
  570. include(cmake/cnml.cmake)
  571. list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
  572. set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
  573. endif()
  574. if (MGE_WITH_ROCM)
  575. include(cmake/rocm.cmake)
  576. endif ()
  577. if(MGE_WITH_ATLAS)
  578. add_subdirectory(dnn/atlas-stub)
  579. list(APPEND MGE_ATLAS_LIBS atlas-stub)
  580. set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
  581. set(MGB_ATLAS ${MGE_WITH_ATLAS})
  582. endif()
  583. find_program(CCACHE_BIN ccache)
  584. if(CCACHE_BIN)
  585. set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  586. if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
  587. message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
  588. set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  589. endif()
  590. endif()
  591. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  592. if(${MGE_BLAS} STREQUAL "MKL")
  593. include(cmake/mkl.cmake)
  594. set(MGE_BLAS_LIBS libmkl)
  595. elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
  596. include(cmake/OpenBLAS.cmake)
  597. set(MGE_BLAS_LIBS libopenblas)
  598. else()
  599. message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  600. endif()
  601. endif()
  602. # MKLDNN build
  603. if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
  604. include(cmake/MKL_DNN.cmake)
  605. set(MEGDNN_X86_WITH_MKL_DNN 1)
  606. endif()
  607. # RTTI
  608. if(MGE_ENABLE_RTTI)
  609. set(MEGDNN_ENABLE_MANGLING 0)
  610. set(MEGDNN_ENABLE_RTTI 1)
  611. else()
  612. set(MEGDNN_ENABLE_MANGLING 1)
  613. set(MEGDNN_ENABLE_RTTI 0)
  614. endif()
  615. set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
  616. # Logging
  617. set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  618. set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  619. set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
  620. # Exception
  621. if(NOT MGE_ENABLE_EXCEPTIONS)
  622. message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
  623. endif()
  624. set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
  625. set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
  626. # JIT
  627. if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
  628. set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
  629. include(cmake/Halide.cmake)
  630. endif()
  631. # Thread
  632. IF(APPLE)
  633. set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  634. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  635. set(CMAKE_USE_WIN32_THREADS_INIT 0)
  636. set(CMAKE_USE_PTHREADS_INIT 1)
  637. set(THREADS_PREFER_PTHREAD_FLAG ON)
  638. message(STATUS "disable jit, halide and mlir on macos host build...")
  639. set(MGE_WITH_HALIDE OFF)
  640. set(MGE_WITH_JIT OFF)
  641. set(MGE_WITH_JIT_MLIR OFF)
  642. ENDIF()
  643. set(MGB_JIT ${MGE_WITH_JIT})
  644. set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
  645. set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
  646. if(MSVC OR WIN32)
  647. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  648. set(CMAKE_USE_WIN32_THREADS_INIT 1)
  649. set(CMAKE_USE_PTHREADS_INIT 1)
  650. set(THREADS_PREFER_PTHREAD_FLAG ON)
  651. endif()
  652. if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
  653. set(MGB_HAVE_THREAD 1)
  654. endif()
  655. if(MGE_WITH_TEST)
  656. # use intra-op multi threads
  657. set(MEGDNN_ENABLE_MULTI_THREADS 1)
  658. endif()
  659. # CUDA
  660. set(MGB_CUDA ${MGE_WITH_CUDA})
  661. set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
  662. #ROCM
  663. set(MGB_ROCM ${MGE_WITH_ROCM})
  664. set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
  665. # CAMBRICON
  666. set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
  667. set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
  668. # ENFLAME
  669. set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
  670. set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
  671. # Debug info
  672. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  673. set(MGB_ASSERT_LOC 1)
  674. set(MGB_ENABLE_DEBUG_UTIL 1)
  675. else()
  676. set(MGB_ASSERT_LOC 0)
  677. set(MGB_ENABLE_DEBUG_UTIL 0)
  678. endif()
  679. # TensorRT
  680. set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
  681. # Inference only
  682. if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
  683. set(MGB_ENABLE_GRAD 0)
  684. set(MGB_BUILD_SLIM_SERVING 1)
  685. else()
  686. set(MGB_ENABLE_GRAD 1)
  687. set(MGB_BUILD_SLIM_SERVING 0)
  688. endif()
  689. # Distributed communication
  690. set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
  691. # MGE_ARCH related flags
  692. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  693. if(MGE_BLAS STREQUAL "MKL")
  694. set(MEGDNN_X86_WITH_MKL 1)
  695. elseif(MGE_BLAS STREQUAL "OpenBLAS")
  696. set(MEGDNN_X86_WITH_OPENBLAS 1)
  697. endif()
  698. endif()
  699. # Enable Naive
  700. if(MGE_ARCH STREQUAL "naive")
  701. set(MEGDNN_NAIVE 1)
  702. message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
  703. endif()
  704. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  705. set(MEGDNN_X86 1)
  706. if(MGE_ARCH STREQUAL "x86_64")
  707. set(MEGDNN_X86_64 1)
  708. set(MEGDNN_64_BIT 1)
  709. if(NOT MSVC)
  710. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
  711. endif()
  712. else()
  713. set(MEGDNN_X86_32 1)
  714. if(NOT MSVC)
  715. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
  716. endif()
  717. endif()
  718. if(NOT MSVC)
  719. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  720. endif()
  721. endif()
  722. # dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
  723. if(NOT APPLE)
  724. CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
  725. if(CXX_COMPILER_SUPPORT_DOT)
  726. message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
  727. set(MGB_ENABLE_DOT 1)
  728. endif()
  729. endif()
  730. if(MGE_ARCH STREQUAL "armv7")
  731. # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  732. if(ANDROID)
  733. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  734. endif()
  735. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  736. set (MARCH "-march=armv7-a")
  737. set (MEGDNN_ARMV7 1)
  738. endif()
  739. if(MGE_ARCH STREQUAL "aarch64")
  740. set(MEGDNN_AARCH64 1)
  741. set(MEGDNN_64_BIT 1)
  742. set(MARCH "-march=armv8-a")
  743. set(MGB_AARCH64 1)
  744. if(MGE_ARMV8_2_FEATURE_FP16)
  745. message(STATUS "Enable fp16 feature support in armv8.2")
  746. if(NOT ${MGE_DISABLE_FLOAT16})
  747. set(MEGDNN_ENABLE_FP16_NEON 1)
  748. endif()
  749. set(MARCH "-march=armv8.2-a+fp16")
  750. endif()
  751. if(MGE_WITH_CUDA)
  752. message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
  753. when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
  754. for save link time(14min->1min), you may open below flags if not deploy on\
  755. arm a53 platform, or just build release type!")
  756. #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
  757. endif()
  758. endif()
  759. if(MGE_ARCH STREQUAL "riscv64")
  760. set(MEGDNN_RISCV64 1)
  761. set(MEGDNN_64_BIT 1)
  762. endif()
  763. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
  764. set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")
  765. # Write out megbrain_build_config.h
  766. # It defines macros needed by both megbrain and dnn
  767. configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
  768. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  769. add_subdirectory(dnn)
  770. list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
  771. set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
  772. set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
  773. file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
  774. add_custom_command(
  775. OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  776. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  777. DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  778. VERBATIM
  779. )
  780. list(APPEND MGB_OPR_PARAM_DEFS_OUTS
  781. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  782. )
  783. install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
  784. list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
  785. add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
  786. add_library(mgb_opr_param_defs INTERFACE)
  787. target_include_directories(mgb_opr_param_defs
  788. INTERFACE
  789. $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  790. $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
  791. )
  792. add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
  793. install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
  794. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  795. # generate param_defs.td
  796. set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
  797. set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
  798. set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
  799. set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
  800. set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
  801. file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
  802. file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
  803. file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
  804. file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
  805. add_custom_command(
  806. OUTPUT ${OPR_PARAM_DEFS_OUT}
  807. COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
  808. DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
  809. VERBATIM
  810. )
  811. # mlir tblgen sources
  812. set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
  813. set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
  814. list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
  815. file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
  816. add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
  817. endif()
  818. if(MGE_WITH_DISTRIBUTED)
  819. set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
  820. set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
  821. add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
  822. endif()
  823. add_subdirectory(src)
  824. if(MGE_BUILD_SDK)
  825. add_subdirectory(sdk/load-and-run)
  826. endif()
  827. if(MGE_BUILD_IMPERATIVE_RT)
  828. add_subdirectory(imperative)
  829. message(STATUS "Enable imperative python wrapper runtime")
  830. endif()
  831. if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
  832. add_subdirectory(test)
  833. endif()
  834. if(TARGET _imperative_rt)
  835. add_custom_target(
  836. develop
  837. COMMAND ${CMAKE_COMMAND} -E create_symlink
  838. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  839. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  840. COMMAND ${CMAKE_COMMAND} -E create_symlink
  841. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  842. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  843. DEPENDS _imperative_rt
  844. VERBATIM
  845. )
  846. endif()
  847. # Configure and install pkg-config.
  848. # Note that unlike the Config.cmake modules, this is not relocatable (and not
  849. # really portable) because we have two dependencies without pkg-config
  850. # descriptions: FlatBuffers and MKL-DNN
  851. if (MGE_USE_SYSTEM_MKLDNN)
  852. set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
  853. endif()
  854. if (MGE_USE_SYSTEM_OPENBLAS)
  855. set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
  856. endif()
  857. configure_file(cmake/megengine.pc.in
  858. ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  859. @ONLY)
  860. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  861. DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
  862. # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
  863. if (NOT MGE_WITH_DISTRIBUTED)
  864. include(CMakePackageConfigHelpers)
  865. set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
  866. configure_package_config_file(cmake/MegEngineConfig.cmake.in
  867. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  868. INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
  869. )
  870. write_basic_package_version_file(
  871. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  872. VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
  873. COMPATIBILITY SameMajorVersion)
  874. install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
  875. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  876. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  877. DESTINATION ${MGE_INSTALL_CMAKEDIR})
  878. endif()
  879. if(MSVC OR WIN32)
  880. add_compile_options(
  881. $<$<CONFIG:>:/MT>
  882. $<$<CONFIG:Debug>:/MTd>
  883. $<$<CONFIG:Release>:/MT>
  884. )
  885. foreach (CompilerFlag
  886. CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
  887. CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
  888. CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
  889. CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
  890. if(${CompilerFlag} MATCHES "/MD")
  891. string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
  892. set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
  893. message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
  894. endif()
  895. endforeach()
  896. endif()
  897. if(MGE_WITH_JIT_MLIR)
  898. add_subdirectory(tools/mlir/mgb-opt)
  899. add_subdirectory(tools/mlir/mgb-file-check)
  900. endif()
  901. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  902. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  903. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  904. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  905. endif()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台