You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

CMakeLists.txt 22 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. cmake_minimum_required(VERSION 3.9.0)
  2. project(MegEngine)
  3. set(CMAKE_CXX_STANDARD 14)
  4. set(CMAKE_CXX_STANDARD_REQUIRED ON)
  5. set(CMAKE_CXX_EXTENSIONS OFF)
  6. set(CMAKE_POSITION_INDEPENDENT_CODE ON)
  7. set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
  8. if(NOT MSVC AND NOT APPLE AND NOT WIN32)
  9. set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  10. set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
  11. set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
  12. endif()
  13. include(CheckCXXCompilerFlag)
  14. CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
  15. set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
  16. set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
  17. x86_64 i386
  18. armv7 aarch64
  19. naive fallback
  20. )
  21. option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
  22. option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON)
  23. option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
  24. option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF)
  25. option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
  26. option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
  27. option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
  28. option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
  29. option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
  30. option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
  31. if(CMAKE_TOOLCHAIN_FILE)
  32. message("We are cross compiling.")
  33. message("config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  34. set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  35. if(ANDROID_TOOLCHAIN_ROOT)
  36. if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
  37. set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
  38. endif()
  39. if(${ANDROID_ARCH} STREQUAL "arm")
  40. set(MGE_ARCH "armv7")
  41. elseif(${ANDROID_ARCH} STREQUAL "arm64")
  42. set(MGE_ARCH "aarch64")
  43. else()
  44. message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
  45. endif()
  46. elseif(IOS_TOOLCHAIN_ROOT)
  47. if(${IOS_ARCH} STREQUAL "armv7")
  48. set(MGE_ARCH "armv7")
  49. elseif(${IOS_ARCH} STREQUAL "arm64")
  50. set(MGE_ARCH "aarch64")
  51. elseif(${IOS_ARCH} STREQUAL "armv7k")
  52. set(MGE_ARCH "armv7")
  53. elseif(${IOS_ARCH} STREQUAL "arm64e")
  54. set(MGE_ARCH "aarch64")
  55. elseif(${IOS_ARCH} STREQUAL "armv7s")
  56. set(MGE_ARCH "armv7")
  57. else()
  58. message(FATAL_ERROR "Unsupported IOS_ARCH.")
  59. endif()
  60. elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
  61. set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  62. else()
  63. message(FATAL_ERROR "Unknown cross-compiling settings.")
  64. endif()
  65. message("CONFIG MGE_ARCH TO ${MGE_ARCH}")
  66. endif()
  67. if(${MGE_ARCH} STREQUAL "AUTO")
  68. if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
  69. set(MGE_ARCH "x86_64")
  70. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
  71. set(MGE_ARCH "i386")
  72. elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
  73. set(MGE_ARCH "aarch64")
  74. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
  75. set(MGE_ARCH "armv7")
  76. else()
  77. message(FATAL "Unknown machine architecture for MegEngine.")
  78. endif()
  79. endif()
  80. if(MSVC OR WIN32)
  81. add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  82. message("-- into windows build...")
  83. message(" -- CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  84. if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
  85. message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
  86. endif()
  87. # add flags for sse/avx for X86
  88. set(WIN_FLAGS "-msse4.2 -mavx -mavx2")
  89. # if u CPU is cascadelake series, u can enable for performance
  90. # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
  91. # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")
  92. # for windows build
  93. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  94. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
  95. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
  96. set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated -Wno-error=documentation -Wno-error=unreachable-code-break")
  97. set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601")
  98. set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
  99. set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")
  100. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  101. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
  102. else()
  103. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
  104. set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  105. if(ANDROID)
  106. set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG")
  107. else()
  108. set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
  109. endif()
  110. endif()
  111. CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
  112. if(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32)
  113. message("-- Using GNU gold linker.")
  114. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold")
  115. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
  116. endif()
  117. if(NOT MGE_WITH_JIT)
  118. if(MGE_WITH_HALIDE)
  119. message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
  120. set(MGE_WITH_HALIDE OFF)
  121. endif()
  122. endif()
  123. if(MGE_WITH_CUDA)
  124. include(CheckLanguage)
  125. check_language(CUDA)
  126. if(NOT CMAKE_CUDA_COMPILER)
  127. message(FATAL_ERROR "CUDA compiler not found in PATH")
  128. endif()
  129. enable_language(CUDA)
  130. set(CMAKE_CUDA_STANDARD 14)
  131. set(CMAKE_CUDA_STANDARD_REQUIRED ON)
  132. endif()
  133. if(NOT MGE_WITH_CUDA)
  134. message("-- Disable JIT support, as CUDA is not enabled.")
  135. set(MGE_WITH_JIT OFF)
  136. set(MGE_WITH_HALIDE OFF)
  137. message("-- Disable TensorRT support, as CUDA is not enabled.")
  138. set(MGE_WITH_TRT OFF)
  139. endif()
  140. find_package(PythonInterp 3 REQUIRED)
  141. set(THREADS_PREFER_PTHREAD_FLAG ON)
  142. find_package(Threads)
  143. if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
  144. if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
  145. set_property(TARGET Threads::Threads
  146. PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
  147. "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  148. endif()
  149. endif()
  150. set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
  151. set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
  152. set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
  153. if(NOT CMAKE_CUDA_HOST_COMPILER)
  154. set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
  155. endif()
  156. option(MGE_ENABLE_RTTI "Build with RTTI" ON)
  157. option(MGE_ENABLE_LOGGING "Build with logging" ON)
  158. option(MGE_DEBUG_UTIL "Enable debug utility" ON)
  159. if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
  160. message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  161. set(CMAKE_BUILD_TYPE RelWithDebInfo)
  162. endif()
  163. if(NOT MGE_ENABLE_RTTI)
  164. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
  165. endif()
  166. option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
  167. if(NOT MGE_ENABLE_EXCEPTIONS)
  168. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exception")
  169. endif()
  170. option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
  171. if(MGE_WITH_TEST)
  172. include(cmake/gtest.cmake)
  173. endif()
  174. option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
  175. if(NOT MGE_WITH_CUDA)
  176. message("-- Disable distributed support, as CUDA is not enabled.")
  177. set(MGE_WITH_DISTRIBUTED OFF)
  178. endif()
  179. option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
  180. option(MGE_WITH_PYTHON_MODULE "Build MegEngine Python Module." ON)
  181. if(MGE_INFERENCE_ONLY)
  182. message("-- Disable distributed support for inference only build.")
  183. set(MGE_WITH_DISTRIBUTED OFF)
  184. message("-- Disable python module for inference only build.")
  185. set(MGE_WITH_PYTHON_MODULE OFF)
  186. message("-- Disable tests for inference only build.")
  187. set(MGE_WITH_TEST OFF)
  188. endif()
  189. if(MGE_WITH_DISTRIBUTED)
  190. include(cmake/protobuf.cmake)
  191. include(cmake/zmq.cmake)
  192. endif()
  193. if(MGB_WITH_FLATBUFFERS)
  194. include(cmake/flatbuffers.cmake)
  195. endif()
  196. if(MGE_WITH_CUDA)
  197. include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  198. foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  199. get_filename_component(_NAME ${path} NAME)
  200. if(NOT ${_NAME} STREQUAL "stubs")
  201. list(APPEND CUDA_LINK_DIRECTORIES ${path})
  202. endif()
  203. endforeach()
  204. link_directories(${CUDA_LINK_DIRECTORIES})
  205. set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  206. set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  207. set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  208. set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  209. set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  210. if(NOT MGE_ENABLE_RTTI)
  211. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  212. endif()
  213. if(NOT MGE_ENABLE_EXCEPTIONS)
  214. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exception")
  215. endif()
  216. if(NOT MGE_CUDA_GENCODE)
  217. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  218. set(MEGDNN_THREADS_512 0)
  219. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  220. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  221. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  222. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  223. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  224. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
  225. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
  226. elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
  227. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  228. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  229. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  230. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
  231. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
  232. else()
  233. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
  234. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
  235. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
  236. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
  237. set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
  238. endif()
  239. else()
  240. message(FATAL_ERROR "Unsupported CUDA host arch.")
  241. endif()
  242. else()
  243. set(MEGDNN_THREADS_512 1)
  244. endif()
  245. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  246. include(cmake/cudnn.cmake)
  247. if(MGE_WITH_TRT)
  248. include(cmake/tensorrt.cmake)
  249. endif()
  250. if(MGE_CUDA_USE_STATIC)
  251. if(MGE_WITH_TRT)
  252. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libcudnn -Wl,--no-whole-archive)
  253. else()
  254. list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
  255. endif()
  256. list(APPEND MGE_CUDA_LIBS cusolver_static cublas_static curand_static culibos cudart_static cusparse_static)
  257. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  258. list(APPEND MGE_CUDA_LIBS cublasLt_static)
  259. endif()
  260. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
  261. # mark all symbols from liblapack_static.a as weak to avoid
  262. # duplicated definition with mkl
  263. find_library(
  264. LAPACK_STATIC_PATH lapack_static
  265. HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
  266. if(NOT LAPACK_STATIC_PATH)
  267. message(FATAL_ERROR "liblapack_static.a not found")
  268. endif()
  269. set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
  270. # add a target that run objcopy
  271. add_custom_command(
  272. OUTPUT ${LAPACK_STATIC_COPY_PATH}
  273. COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
  274. VERBATIM)
  275. add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
  276. # create a library named "lapack_static_weak"
  277. add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
  278. add_dependencies(lapack_static_weak lapack_static_weak_target)
  279. set_target_properties(
  280. lapack_static_weak PROPERTIES
  281. IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
  282. list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
  283. endif()
  284. else()
  285. if(MGE_WITH_TRT)
  286. list(APPEND MGE_CUDA_LIBS libnvinfer)
  287. endif()
  288. list(APPEND MGE_CUDA_LIBS libcudnn)
  289. if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
  290. list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
  291. endif()
  292. endif()
  293. add_subdirectory(dnn/cuda-stub)
  294. list(APPEND MGE_CUDA_LIBS nvrtc cuda-stub nvToolsExt)
  295. set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS}")
  296. endif()
  297. find_program(CCACHE_BIN ccache)
  298. if(CCACHE_BIN)
  299. set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  300. if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
  301. message("-- Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
  302. set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  303. endif()
  304. endif()
  305. if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
  306. if(${MGE_BLAS} STREQUAL "MKL")
  307. include(cmake/mkl.cmake)
  308. set(MGE_BLAS_LIBS libmkl)
  309. elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
  310. include(cmake/OpenBLAS.cmake)
  311. set(MGE_BLAS_LIBS libopenblas)
  312. else()
  313. message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  314. endif()
  315. endif()
  316. option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
  317. # MKLDNN build
  318. if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
  319. include(cmake/MKL_DNN.cmake)
  320. set(MEGDNN_X86_WITH_MKL_DNN 1)
  321. endif()
  322. # RTTI
  323. if(MGE_ENABLE_RTTI)
  324. set(MEGDNN_ENABLE_MANGLING 0)
  325. set(MEGDNN_ENABLE_RTTI 1)
  326. else()
  327. set(MEGDNN_ENABLE_MANGLING 1)
  328. set(MEGDNN_ENABLE_RTTI 0)
  329. endif()
  330. set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
  331. # Logging
  332. set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  333. set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
  334. set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
  335. # Exception
  336. if(NOT MGE_ENABLE_EXCEPTIONS)
  337. message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
  338. endif()
  339. set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
  340. set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
  341. # JIT
  342. if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
  343. set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
  344. include(cmake/Halide.cmake)
  345. endif()
  346. set(MGB_JIT ${MGE_WITH_JIT})
  347. set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
  348. # Thread
  349. if(CMAKE_THREAD_LIBS_INIT)
  350. set(MGB_HAVE_THREAD 1)
  351. endif()
  352. if(MGE_WITH_TEST)
  353. # use intra-op multi threads
  354. set(MEGDNN_ENABLE_MULTI_THREADS 1)
  355. endif()
  356. # CUDA
  357. set(MGB_CUDA ${MGE_WITH_CUDA})
  358. set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})
  359. # Debug info
  360. if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
  361. set(MGB_ASSERT_LOC 1)
  362. set(MGB_ENABLE_DEBUG_UTIL 1)
  363. else()
  364. set(MGB_ASSERT_LOC 0)
  365. set(MGB_ENABLE_DEBUG_UTIL 0)
  366. endif()
  367. # FIXME: remove this after imp DEBUG UTIL for windows
  368. if(MSVC OR WIN32)
  369. set(MGB_ENABLE_DEBUG_UTIL 0)
  370. message(" -- disable MGB_ENABLE_DEBUG_UTIL in windows build")
  371. endif()
  372. # TensorRT
  373. set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
  374. # Inference only
  375. if(MGE_INFERENCE_ONLY)
  376. set(MGB_ENABLE_GRAD 0)
  377. set(MGB_BUILD_SLIM_SERVING 1)
  378. else()
  379. set(MGB_ENABLE_GRAD 1)
  380. set(MGB_BUILD_SLIM_SERVING 0)
  381. endif()
  382. # Distributed communication
  383. set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})
  384. # MGE_ARCH related flags
  385. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  386. if(MGE_BLAS STREQUAL "MKL")
  387. set(MEGDNN_X86_WITH_MKL 1)
  388. elseif(MGE_BLAS STREQUAL "OpenBLAS")
  389. set(MEGDNN_X86_WITH_OPENBLAS 1)
  390. endif()
  391. endif()
  392. # Enable Naive
  393. if(MGE_ARCH STREQUAL "naive")
  394. set(MEGDNN_NAIVE 1)
  395. message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
  396. endif()
  397. if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
  398. set(MEGDNN_X86 1)
  399. if(MGE_ARCH STREQUAL "x86_64")
  400. set(MEGDNN_X86_64 1)
  401. set(MEGDNN_64_BIT 1)
  402. if(NOT MSVC)
  403. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
  404. endif()
  405. else()
  406. set(MEGDNN_X86_32 1)
  407. if(NOT MSVC)
  408. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
  409. endif()
  410. endif()
  411. if(NOT MSVC)
  412. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  413. endif()
  414. endif()
  415. if(MGE_ARCH STREQUAL "armv7")
  416. # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  417. if(ANDROID)
  418. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  419. endif()
  420. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  421. set (MARCH "-march=armv7-a")
  422. set (MEGDNN_ARMV7 1)
  423. endif()
  424. if(MGE_ARCH STREQUAL "aarch64")
  425. set(MEGDNN_AARCH64 1)
  426. set(MEGDNN_64_BIT 1)
  427. set(MARCH "-march=armv8-a")
  428. if(MGE_ARMV8_2_FEATURE_FP16)
  429. message("Enable fp16 feature support in armv8.2")
  430. if(NOT ${MGE_DISABLE_FLOAT16})
  431. set(MEGDNN_ENABLE_FP16_NEON 1)
  432. endif()
  433. set(MARCH "-march=armv8.2-a+fp16")
  434. endif()
  435. if(MGE_ARMV8_2_FEATURE_DOTPROD)
  436. message("Enable dotprod feature support in armv8.2")
  437. if(MGE_ARMV8_2_FEATURE_FP16)
  438. set(MARCH "-march=armv8.2-a+fp16+dotprod")
  439. else()
  440. set(MARCH "-march=armv8.2-a+dotprod")
  441. endif()
  442. endif()
  443. endif()
  444. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
  445. # Write out megbrain_build_config.h
  446. # It defines macros needed by both megbrain and dnn
  447. configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
  448. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION include)
  449. add_subdirectory(dnn)
  450. list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
  451. set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
  452. set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
  453. file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
  454. add_custom_command(
  455. OUTPUT
  456. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  457. COMMAND ${PYTHON_EXECUTABLE} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS}
  458. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  459. DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  460. VERBATIM
  461. )
  462. list(APPEND MGB_OPR_PARAM_DEFS_OUTS
  463. ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  464. )
  465. install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION include/megbrain/opr/)
  466. list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
  467. add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
  468. add_library(mgb_opr_param_defs INTERFACE)
  469. target_include_directories(mgb_opr_param_defs INTERFACE ${MGB_OPR_PARAM_DEFS_INC})
  470. add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
  471. if(MGE_WITH_DISTRIBUTED)
  472. add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
  473. endif()
  474. add_subdirectory(src)
  475. add_subdirectory(sdk/load-and-run)
  476. if(MGE_WITH_PYTHON_MODULE)
  477. add_subdirectory(python_module)
  478. endif()
  479. if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
  480. add_subdirectory(test)
  481. endif()
  482. if(TARGET _mgb)
  483. add_custom_target(
  484. develop
  485. COMMAND ${CMAKE_COMMAND} -E create_symlink
  486. ${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/$<TARGET_FILE_NAME:_mgb>
  487. ${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/$<TARGET_FILE_NAME:_mgb>
  488. COMMAND ${CMAKE_COMMAND} -E create_symlink
  489. ${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/mgb.py
  490. ${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/mgb.py
  491. COMMAND ${CMAKE_COMMAND} -E create_symlink
  492. ${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/opr.py
  493. ${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/opr.py
  494. COMMAND ${CMAKE_COMMAND} -E create_symlink
  495. ${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/opr_param_defs.py
  496. ${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/opr_param_defs.py
  497. COMMAND ${CMAKE_COMMAND} -E create_symlink
  498. ${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/include
  499. ${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/include
  500. DEPENDS _mgb
  501. VERBATIM
  502. )
  503. endif()
  504. IF(APPLE)
  505. set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  506. set(CMAKE_HAVE_THREADS_LIBRARY 1)
  507. set(CMAKE_USE_WIN32_THREADS_INIT 0)
  508. set(CMAKE_USE_PTHREADS_INIT 1)
  509. set(THREADS_PREFER_PTHREAD_FLAG ON)
  510. ENDIF()

MegEngine 安装包中集成了使用 GPU 运行代码所需的 CUDA 环境,不用区分 CPU 和 GPU 版。 如果想要运行 GPU 程序,请确保机器本身配有 GPU 硬件设备并安装好驱动。 如果你想体验在云端 GPU 算力平台进行深度学习开发的感觉,欢迎访问 MegStudio 平台