You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 40 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981
  1. cmake_minimum_required(VERSION 3.15.2)
  2. include (cmake/FetchMegBrainVersion.cmake)
  3. project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
  4. set(CMAKE_CXX_STANDARD 14)
  5. set(CMAKE_CXX_STANDARD_REQUIRED ON)
  6. set(CMAKE_CXX_EXTENSIONS OFF)
  7. set(CMAKE_POSITION_INDEPENDENT_CODE ON)
  8. set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
  9. set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
  10. if(NOT MSVC AND NOT APPLE AND NOT WIN32)
  11. set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  12. set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
  13. set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
  14. endif()
  15. include(GNUInstallDirs)
  16. include(CheckCXXCompilerFlag)
  17. CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
  18. set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
  19. set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
  20. x86_64 i386
  21. armv7 aarch64
  22. naive fallback
  23. )
  24. set (MGE_EXPORT_TARGETS MegEngine-targets)
  25. option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
  26. option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
  27. option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
  28. option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
  29. option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
  30. option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
  31. option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF)
  32. option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
  33. option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
  34. option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
  35. option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
  36. option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
  37. option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
  38. option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." OFF)
  39. option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
  40. option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
  41. option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
  42. option(BUILD_SHARED_LIBS "Build shared libraries" ON)
  43. option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
  44. option(MGE_ENABLE_RTTI "Build with RTTI" ON)
  45. option(MGE_ENABLE_LOGGING "Build with logging" ON)
  46. option(MGE_DEBUG_UTIL "Enable debug utility" ON)
  47. option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
  48. option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
  49. option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
  50. option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
  51. option(MGE_BUILD_SDK "Build load_and_run" ON)
  52. option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
  53. option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
  54. option(MGE_WITH_ROCM "Enable ROCM support" OFF)
  55. option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
  56. if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
  57. set(MGE_WITH_ANY_CUDA_STUB ON)
  58. else()
  59. set(MGE_WITH_ANY_CUDA_STUB OFF)
  60. endif()
  61. if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
  62. message(STATUS "build with BIN REDUCE")
  63. if(MGE_WITH_MINIMUM_SIZE)
  64. set(MGE_ENABLE_RTTI OFF)
  65. set(MGE_ENABLE_LOGGING OFF)
  66. set(MGE_ENABLE_EXCEPTIONS OFF)
  67. set(MGE_INFERENCE_ONLY ON)
  68. endif()
  69. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
  70. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
  71. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
  72. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
  73. endif()
  74. if(MGE_WITH_MIDOUT_PROFILE)
  75. message(STATUS "build with MIDOUT PROFILE")
  76. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
  77. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
  78. endif()
  79. if (APPLE)
  80. set (BUILD_SHARED_LIBS OFF)
  81. message(STATUS "build static for xcode framework require")
  82. endif()
  83. if (MGE_USE_SYSTEM_LIB)
  84. set (MGE_CUDA_USE_STATIC OFF)
  85. endif()
  86. if (MGB_WITH_FLATBUFFERS)
  87. set(MGB_ENABLE_FBS_SERIALIZATION ON)
  88. endif()
  89. if(CMAKE_TOOLCHAIN_FILE)
  90. message(STATUS "We are cross compiling.")
  91. message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  92. set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  93. if(ANDROID_TOOLCHAIN_ROOT)
  94. if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
  95. set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
  96. endif()
  97. if(${ANDROID_ARCH} STREQUAL "arm")
  98. set(MGE_ARCH "armv7")
  99. elseif(${ANDROID_ARCH} STREQUAL "arm64")
  100. set(MGE_ARCH "aarch64")
  101. else()
  102. message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
  103. endif()
  104. elseif(IOS_TOOLCHAIN_ROOT)
  105. if(${IOS_ARCH} STREQUAL "armv7")
  106. set(MGE_ARCH "armv7")
  107. elseif(${IOS_ARCH} STREQUAL "arm64")
  108. set(MGE_ARCH "aarch64")
  109. elseif(${IOS_ARCH} STREQUAL "armv7k")
  110. set(MGE_ARCH "armv7")
  111. elseif(${IOS_ARCH} STREQUAL "arm64e")
  112. set(MGE_ARCH "aarch64")
  113. elseif(${IOS_ARCH} STREQUAL "armv7s")
  114. set(MGE_ARCH "armv7")
  115. else()
  116. message(FATAL_ERROR "Unsupported IOS_ARCH.")
  117. endif()
  118. elseif(RISCV_TOOLCHAIN_ROOT)
  119. set(MGE_ARCH "riscv64")
  120. elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
  121. set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  122. else()
  123. message(FATAL_ERROR "Unknown cross-compiling settings.")
  124. endif()
  125. message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
  126. endif()
  127. if(${MGE_ARCH} STREQUAL "AUTO")
  128. if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
  129. set(MGE_ARCH "x86_64")
  130. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  131. set(MGE_ARCH "i386")
  132. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
  133. set(MGE_ARCH "aarch64")
  134. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
  135. set(MGE_ARCH "armv7")
  136. else()
  137. message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
  138. endif()
  139. endif()
  140. if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
  141. option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
  142. if(MGE_ENABLE_CPUINFO)
  143. message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
  144. add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
  145. include(cmake/cpuinfo.cmake)
  146. endif()
  147. endif()
  148. if(MSVC OR WIN32)
  149. # for cmake after 3.15.2
  150. cmake_policy(SET CMP0091 NEW)
  151. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  152. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
  153. else()
  154. set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
  155. endif()
  156. add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  157. message(STATUS "into windows build...")
  158. message(VERBOSE "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  159. if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
  160. message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
  161. endif()
  162. # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
  163. set(WIN_FLAGS "-msse4.2 -O2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
  164. # if u CPU is cascadelake series, u can enable for performance
  165. # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
  166. # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
  167. # for windows build
  168. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  169. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
  170. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
  171. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
  172. set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
  173. set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
  174. set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
  175. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  176. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
  177. #FIXME: fix halide JIT on windows
  178. message(STATUS "disable jit, halide and mlir on windows host build...")
  179. set(MGE_WITH_HALIDE OFF)
  180. set(MGE_WITH_JIT OFF)
  181. set(MGE_WITH_JIT_MLIR OFF)
  182. #FIXME: fix MegRay on windows
  183. message(STATUS "Disable distributed build on windows host build...")
  184. set(MGE_WITH_DISTRIBUTED OFF)
  185. else()
  186. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
  187. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  188. if(ANDROID)
  189. set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG")
  190. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -DNDEBUG -g")
  191. else()
  192. set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
  193. set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -DNDEBUG -g")
  194. endif()
  195. endif()
  196. if(MGE_WITH_CUDA)
  197. include(cmake/cudnn.cmake)
  198. if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  199. message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
  200. set(MGE_WITH_LARGE_ARCHIVE ON)
  201. endif()
  202. endif()
  203. CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
  204. if(MGE_WITH_LARGE_ARCHIVE)
  205. message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
  206. set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
  207. elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
  208. message(STATUS "Using GNU gold linker.")
  209. set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
  210. endif()
  211. set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  212. set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  213. set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
  214. if(NOT MGE_WITH_JIT)
  215. if(MGE_WITH_HALIDE)
  216. message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
  217. set(MGE_WITH_HALIDE OFF)
  218. endif()
  219. if(MGE_WITH_JIT_MLIR)
  220. message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
  221. set(MGE_WITH_JIT_MLIR OFF)
  222. endif()
  223. endif()
  224. # FIXME At present, there are some conflicts between the LLVM that halide
  225. # depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
  226. # versions.
  227. if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
  228. message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
  229. endif()
  230. if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
  231. message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
  232. endif()
  233. if(MGE_WITH_CUDA)
  234. # FIXME: check_language(CUDA) failed when sbsa mode!
  235. # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
  236. if(CMAKE_TOOLCHAIN_FILE)
  237. set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  238. message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
  239. endif()
  240. include(CheckLanguage)
  241. check_language(CUDA)
  242. if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
  243. message(FATAL_ERROR "CUDA compiler not found in PATH")
  244. endif()
  245. # remove this after CMAKE fix nvcc sbsa
  246. if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
  247. set(CMAKE_CUDA_COMPILER "nvcc")
  248. message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
  249. endif()
  250. enable_language(CUDA)
  251. set(CMAKE_CUDA_STANDARD 14)
  252. set(CMAKE_CUDA_STANDARD_REQUIRED ON)
  253. endif()
  254. if(NOT MGE_WITH_CUDA)
  255. if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
  256. message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
  257. set(MGE_WITH_JIT OFF)
  258. set(MGE_WITH_JIT_MLIR OFF)
  259. endif()
  260. set(MGE_WITH_HALIDE OFF)
  261. message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
  262. set(MGE_WITH_TRT OFF)
  263. endif()
  264. find_package(PythonInterp 3 REQUIRED)
  265. set(THREADS_PREFER_PTHREAD_FLAG ON)
  266. find_package(Threads)
  267. if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
  268. if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
  269. set_property(TARGET Threads::Threads
  270. PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
  271. "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  272. endif()
  273. endif()
  274. set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
  275. set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
  276. set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
  277. if(NOT CMAKE_CUDA_HOST_COMPILER)
  278. set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
  279. endif()
  280. if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
  281. message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  282. set(CMAKE_BUILD_TYPE RelWithDebInfo)
  283. endif()
  284. if(NOT MGE_ENABLE_RTTI)
  285. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
  286. endif()
  287. if(NOT MGE_ENABLE_EXCEPTIONS)
  288. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
  289. endif()
  290. if(MGE_WITH_TEST)
  291. include(cmake/gtest.cmake)
  292. endif()
  293. if(MGE_BUILD_IMPERATIVE_RT)
  294. set(CMAKE_CXX_STANDARD 17)
  295. endif()
  296. if(NOT MGE_WITH_CUDA)
  297. message(STATUS "Disable distributed support, as CUDA is not enabled.")
  298. set(MGE_WITH_DISTRIBUTED OFF)
  299. endif()
  300. if(MGE_INFERENCE_ONLY)
  301. message(STATUS "Disable distributed support for inference only build.")
  302. set(MGE_WITH_DISTRIBUTED OFF)
  303. message(STATUS "Disable imperative_rt python module for inference only build.")
  304. set(MGE_BUILD_IMPERATIVE_RT OFF)
  305. endif()
  306. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  307. include(cmake/llvm-project.cmake)
  308. endif()
  309. if(MGE_WITH_DISTRIBUTED)
  310. include(cmake/protobuf.cmake)
  311. include(cmake/zmq.cmake)
  312. endif()
  313. if(MGB_WITH_FLATBUFFERS)
  314. include(cmake/flatbuffers.cmake)
  315. endif()
  316. if(MGE_WITH_CUDA)
  317. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  318. foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  319. get_filename_component(_NAME ${path} NAME)
  320. if(NOT ${_NAME} STREQUAL "stubs")
  321. list(APPEND CUDA_LINK_DIRECTORIES ${path})
  322. endif()
  323. endforeach()
  324. link_directories(${CUDA_LINK_DIRECTORIES})
  325. set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  326. set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  327. set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  328. set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  329. if(MSVC OR WIN32)
  330. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
  331. set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068")
  332. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  333. set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
  334. endif()
  335. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
  336. else()
  337. set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  338. endif()
  339. if(NOT MGE_ENABLE_RTTI)
  340. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  341. endif()
  342. if(NOT MGE_ENABLE_EXCEPTIONS)
  343. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
  344. endif()
  345. if(NOT MGE_CUDA_GENCODE)
  346. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
  347. set(MEGDNN_THREADS_512 0)
  348. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  349. message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
  350. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  351. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  352. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  353. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
  354. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  355. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  356. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  357. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  358. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
  359. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
  360. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
  361. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  362. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  363. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  364. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
  365. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
  366. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  367. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  368. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  369. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  370. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  371. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  372. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
  373. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
  374. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  375. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  376. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  377. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  378. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
  379. else()
  380. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
  381. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  382. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  383. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  384. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
  385. endif()
  386. else()
  387. message(FATAL_ERROR "Unsupported CUDA host arch.")
  388. endif()
  389. else()
  390. set(MEGDNN_THREADS_512 1)
  391. endif()
  392. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  393. if(MGE_WITH_TRT)
  394. include(cmake/tensorrt.cmake)
  395. endif()
  396. if(MGE_CUDA_USE_STATIC)
  397. if(MGE_WITH_TRT)
  398. if(MSVC OR WIN32)
  399. message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
  400. list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
  401. else()
  402. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
  403. endif()
  404. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  405. message(STATUS "handle trt myelin lib after trt7")
  406. list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
  407. endif()
  408. endif()
  409. if("${CUDNN_VERSION}" STREQUAL "7.5.0")
  410. if(MSVC OR WIN32)
  411. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  412. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  413. else()
  414. message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
  415. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
  416. endif()
  417. else()
  418. if(MSVC OR WIN32)
  419. message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
  420. list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
  421. else()
  422. list(APPEND MGE_CUDA_LIBS libcudnn)
  423. endif()
  424. endif()
  425. if(MSVC OR WIN32)
  426. list(APPEND MGE_CUDA_LIBS cusolver.lib cublas.lib curand.lib cudart_static.lib cusparse.lib)
  427. else()
  428. list(APPEND MGE_CUDA_LIBS cusolver_static cublas_static curand_static culibos cudart_static cusparse_static)
  429. endif()
  430. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  431. if(MSVC OR WIN32)
  432. list(APPEND MGE_CUDA_LIBS cublasLt.lib)
  433. else()
  434. list(APPEND MGE_CUDA_LIBS cublasLt_static)
  435. endif()
  436. endif()
  437. if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
  438. # mark all symbols from liblapack_static.a as weak to avoid
  439. # duplicated definition with mkl
  440. find_library(
  441. LAPACK_STATIC_PATH lapack_static
  442. HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  443. if(NOT LAPACK_STATIC_PATH)
  444. message(FATAL_ERROR "liblapack_static.a not found")
  445. endif()
  446. set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
  447. # add a target that run objcopy
  448. add_custom_command(
  449. OUTPUT ${LAPACK_STATIC_COPY_PATH}
  450. COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
  451. VERBATIM)
  452. add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
  453. # create a library named "lapack_static_weak"
  454. add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
  455. add_dependencies(lapack_static_weak lapack_static_weak_target)
  456. set_target_properties(
  457. lapack_static_weak PROPERTIES
  458. IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
  459. list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
  460. endif()
  461. else()
  462. if(MGE_WITH_TRT)
  463. list(APPEND MGE_CUDA_LIBS libnvinfer)
  464. if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
  465. message(STATUS "handle trt myelin lib after trt7")
  466. list(APPEND MGE_CUDA_LIBS libmyelin)
  467. endif()
  468. endif()
  469. list(APPEND MGE_CUDA_LIBS libcudnn)
  470. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  471. list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
  472. endif()
  473. list(APPEND MGE_CUDA_LIBS cudart)
  474. endif()
  475. if(NOT MGE_WITH_CUDA_STUB)
  476. if(MSVC OR WIN32)
  477. list(APPEND MGE_CUDA_LIBS cuda.lib)
  478. else()
  479. list(APPEND MGE_CUDA_LIBS cuda)
  480. endif()
  481. endif()
  482. if(NOT MGE_WITH_NVRTC_STUB)
  483. if(MSVC OR WIN32)
  484. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  485. else()
  486. list(APPEND MGE_CUDA_LIBS nvrtc)
  487. endif()
  488. endif()
  489. if(MGE_WITH_ANY_CUDA_STUB)
  490. add_subdirectory(dnn/cuda-stub)
  491. list(APPEND MGE_CUDA_LIBS cuda-stub)
  492. endif()
  493. if(MSVC OR WIN32)
  494. list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  495. else()
  496. list(APPEND MGE_CUDA_LIBS nvToolsExt)
  497. endif()
  498. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
  499. if(UNIX)
  500. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
  501. endif()
  502. endif()
  503. if(MGE_WITH_CAMBRICON)
  504. include_directories("$ENV{NEUWARE_HOME}/include")
  505. link_directories("$ENV{NEUWARE_HOME}/lib64")
  506. include(cmake/FindBANG/FindBANG.cmake)
  507. if (${MGE_MLU_ARCH} STREQUAL "MLU100")
  508. set(BANG_ARCH "100")
  509. elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
  510. set(BANG_ARCH "110")
  511. elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
  512. set(BANG_ARCH "220")
  513. elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
  514. set(BANG_ARCH "270")
  515. elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
  516. set(BANG_ARCH "290")
  517. elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
  518. set(BANG_ARCH "200")
  519. else()
  520. message (FATAL_ERROR "Unsupported MLU arch.")
  521. endif()
  522. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
  523. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
  524. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
  525. if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
  526. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
  527. elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
  528. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
  529. elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  530. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
  531. elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
  532. set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
  533. endif()
  534. include(cmake/cnrt.cmake)
  535. include(cmake/cndev.cmake)
  536. include(cmake/cnml.cmake)
  537. list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
  538. set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
  539. endif()
  540. if (MGE_WITH_ROCM)
  541. include(cmake/rocm.cmake)
  542. endif ()
  543. if(MGE_WITH_ATLAS)
  544. add_subdirectory(dnn/atlas-stub)
  545. list(APPEND MGE_ATLAS_LIBS atlas-stub)
  546. set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
  547. set(MGB_ATLAS ${MGE_WITH_ATLAS})
  548. endif()
  549. find_program(CCACHE_BIN ccache)
  550. if(CCACHE_BIN)
  551. set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  552. if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
  553. message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
  554. set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  555. endif()
  556. endif()
  557. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  558. if(${MGE_BLAS} STREQUAL "MKL")
  559. include(cmake/mkl.cmake)
  560. set(MGE_BLAS_LIBS libmkl)
  561. elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
  562. include(cmake/OpenBLAS.cmake)
  563. set(MGE_BLAS_LIBS libopenblas)
  564. else()
  565. message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  566. endif()
  567. endif()
  568. # MKLDNN build
  569. if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
  570. include(cmake/MKL_DNN.cmake)
  571. set(MEGDNN_X86_WITH_MKL_DNN 1)
  572. endif()
  573. # RTTI
  574. if(MGE_ENABLE_RTTI)
  575. set(MEGDNN_ENABLE_MANGLING 0)
  576. set(MEGDNN_ENABLE_RTTI 1)
  577. else()
  578. set(MEGDNN_ENABLE_MANGLING 1)
  579. set(MEGDNN_ENABLE_RTTI 0)
  580. endif()
  581. set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
  582. # Logging
  583. set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  584. set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  585. set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
  586. # Exception
  587. if(NOT MGE_ENABLE_EXCEPTIONS)
  588. message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
  589. endif()
  590. set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
  591. set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
  592. # JIT
  593. if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
  594. set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
  595. include(cmake/Halide.cmake)
  596. endif()
  597. # Thread
  598. IF(APPLE)
  599. set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  600. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  601. set(CMAKE_USE_WIN32_THREADS_INIT 0)
  602. set(CMAKE_USE_PTHREADS_INIT 1)
  603. set(THREADS_PREFER_PTHREAD_FLAG ON)
  604. message(STATUS "disable jit, halide and mlir on macos host build...")
  605. set(MGE_WITH_HALIDE OFF)
  606. set(MGE_WITH_JIT OFF)
  607. set(MGE_WITH_JIT_MLIR OFF)
  608. ENDIF()
  609. set(MGB_JIT ${MGE_WITH_JIT})
  610. set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
  611. set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
  612. if(MSVC OR WIN32)
  613. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  614. set(CMAKE_USE_WIN32_THREADS_INIT 1)
  615. set(CMAKE_USE_PTHREADS_INIT 1)
  616. set(THREADS_PREFER_PTHREAD_FLAG ON)
  617. endif()
  618. if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT)
  619. set(MGB_HAVE_THREAD 1)
  620. endif()
  621. if(MGE_WITH_TEST)
  622. # use intra-op multi threads
  623. set(MEGDNN_ENABLE_MULTI_THREADS 1)
  624. endif()
  625. # CUDA
  626. set(MGB_CUDA ${MGE_WITH_CUDA})
  627. set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
  628. #ROCM
  629. set(MGB_ROCM ${MGE_WITH_ROCM})
  630. set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})
  631. # CAMBRICON
  632. set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
  633. set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})
  634. # Debug info
  635. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  636. set(MGB_ASSERT_LOC 1)
  637. set(MGB_ENABLE_DEBUG_UTIL 1)
  638. else()
  639. set(MGB_ASSERT_LOC 0)
  640. set(MGB_ENABLE_DEBUG_UTIL 0)
  641. endif()
  642. # TensorRT
  643. set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
  644. # Inference only
  645. if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
  646. set(MGB_ENABLE_GRAD 0)
  647. set(MGB_BUILD_SLIM_SERVING 1)
  648. else()
  649. set(MGB_ENABLE_GRAD 1)
  650. set(MGB_BUILD_SLIM_SERVING 0)
  651. endif()
  652. # Distributed communication
  653. set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
  654. # MGE_ARCH related flags
  655. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  656. if(MGE_BLAS STREQUAL "MKL")
  657. set(MEGDNN_X86_WITH_MKL 1)
  658. elseif(MGE_BLAS STREQUAL "OpenBLAS")
  659. set(MEGDNN_X86_WITH_OPENBLAS 1)
  660. endif()
  661. endif()
  662. # Enable Naive
  663. if(MGE_ARCH STREQUAL "naive")
  664. set(MEGDNN_NAIVE 1)
  665. message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
  666. endif()
  667. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  668. set(MEGDNN_X86 1)
  669. if(MGE_ARCH STREQUAL "x86_64")
  670. set(MEGDNN_X86_64 1)
  671. set(MEGDNN_64_BIT 1)
  672. if(NOT MSVC)
  673. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
  674. endif()
  675. else()
  676. set(MEGDNN_X86_32 1)
  677. if(NOT MSVC)
  678. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
  679. endif()
  680. endif()
  681. if(NOT MSVC)
  682. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  683. endif()
  684. endif()
  685. if(MGE_ARCH STREQUAL "armv7")
  686. # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  687. if(ANDROID)
  688. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  689. endif()
  690. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  691. set (MARCH "-march=armv7-a")
  692. set (MEGDNN_ARMV7 1)
  693. endif()
  694. if(MGE_ARCH STREQUAL "aarch64")
  695. set(MEGDNN_AARCH64 1)
  696. set(MEGDNN_64_BIT 1)
  697. set(MARCH "-march=armv8-a")
  698. set(MGB_AARCH64 1)
  699. if(MGE_ARMV8_2_FEATURE_FP16)
  700. message(STATUS "Enable fp16 feature support in armv8.2")
  701. if(NOT ${MGE_DISABLE_FLOAT16})
  702. set(MEGDNN_ENABLE_FP16_NEON 1)
  703. endif()
  704. set(MARCH "-march=armv8.2-a+fp16")
  705. endif()
  706. if(MGE_ARMV8_2_FEATURE_DOTPROD)
  707. message(STATUS "Enable dotprod feature support in armv8.2")
  708. if(MGE_ARMV8_2_FEATURE_FP16)
  709. set(MARCH "-march=armv8.2-a+fp16+dotprod")
  710. else()
  711. set(MARCH "-march=armv8.2-a+dotprod")
  712. endif()
  713. endif()
  714. if(MGE_WITH_CUDA)
  715. message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
  716. when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
  717. for save link time(14min->1min), you may open below flags if not deploy on\
  718. arm a53 platform, or just build release type!")
  719. #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
  720. endif()
  721. endif()
  722. if(MGE_ARCH STREQUAL "riscv64")
  723. set(MEGDNN_RISCV64 1)
  724. set(MEGDNN_64_BIT 1)
  725. endif()
  726. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
  727. set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")
  728. # Write out megbrain_build_config.h
  729. # It defines macros needed by both megbrain and dnn
  730. configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
  731. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
  732. add_subdirectory(dnn)
  733. list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
  734. set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
  735. set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
  736. file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
  737. add_custom_command(
  738. OUTPUT
  739. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  740. COMMAND ${PYTHON_EXECUTABLE} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS}
  741. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  742. DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  743. VERBATIM
  744. )
  745. list(APPEND MGB_OPR_PARAM_DEFS_OUTS
  746. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  747. )
  748. install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
  749. list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
  750. add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
  751. add_library(mgb_opr_param_defs INTERFACE)
  752. target_include_directories(mgb_opr_param_defs
  753. INTERFACE
  754. $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
  755. $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
  756. )
  757. add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
  758. install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
  759. if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
  760. # generate param_defs.td
  761. set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
  762. set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
  763. set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
  764. set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
  765. set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
  766. file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
  767. file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
  768. file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
  769. file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
  770. add_custom_target(param_defs_tblgen
  771. COMMAND ${PYTHON_EXECUTABLE} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
  772. DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
  773. VERBATIM
  774. )
  775. # mlir tblgen sources
  776. set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
  777. set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
  778. list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
  779. file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
  780. endif()
  781. if(MGE_WITH_DISTRIBUTED)
  782. add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
  783. endif()
  784. add_subdirectory(src)
  785. if(MGE_BUILD_SDK)
  786. add_subdirectory(sdk/load-and-run)
  787. endif()
  788. if(MGE_BUILD_IMPERATIVE_RT)
  789. add_subdirectory(imperative)
  790. message(STATUS "Enable imperative python wrapper runtime")
  791. endif()
  792. if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
  793. add_subdirectory(test)
  794. endif()
  795. if(TARGET _imperative_rt)
  796. add_custom_target(
  797. develop
  798. COMMAND ${CMAKE_COMMAND} -E create_symlink
  799. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  800. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
  801. COMMAND ${CMAKE_COMMAND} -E create_symlink
  802. ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  803. ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
  804. DEPENDS _imperative_rt
  805. VERBATIM
  806. )
  807. endif()
  808. # Configure and install pkg-config.
  809. # Note that unlike the Config.cmake modules, this is not relocatable (and not
  810. # really portable) because we have two dependencies without pkg-config
  811. # descriptions: FlatBuffers and MKL-DNN
  812. if (MGE_USE_SYSTEM_MKLDNN)
  813. set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
  814. endif()
  815. if (MGE_USE_SYSTEM_OPENBLAS)
  816. set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
  817. endif()
  818. configure_file(cmake/megengine.pc.in
  819. ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  820. @ONLY)
  821. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
  822. DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
  823. # Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
  824. if (NOT MGE_WITH_DISTRIBUTED)
  825. include(CMakePackageConfigHelpers)
  826. set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
  827. configure_package_config_file(cmake/MegEngineConfig.cmake.in
  828. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  829. INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
  830. )
  831. write_basic_package_version_file(
  832. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  833. VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
  834. COMPATIBILITY SameMajorVersion)
  835. install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
  836. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
  837. ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
  838. DESTINATION ${MGE_INSTALL_CMAKEDIR})
  839. endif()
  840. if(MSVC OR WIN32)
  841. add_compile_options(
  842. $<$<CONFIG:>:/MT>
  843. $<$<CONFIG:Debug>:/MTd>
  844. $<$<CONFIG:Release>:/MT>
  845. )
  846. foreach (CompilerFlag
  847. CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
  848. CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
  849. CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
  850. CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
  851. if(${CompilerFlag} MATCHES "/MD")
  852. string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
  853. set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
  854. message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
  855. endif()
  856. endforeach()
  857. endif()
  858. if(MGE_WITH_JIT_MLIR)
  859. add_subdirectory(tools/mlir/mgb-opt)
  860. add_subdirectory(tools/mlir/mgb-file-check)
  861. endif()
  862. if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
  863. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  864. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  865. message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
  866. endif()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台