GitOrigin-RevId: d412108732
tags/v1.3.1
@@ -34,7 +34,6 @@ option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF) | |||
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF) | |||
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF) | |||
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF) | |||
option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF) | |||
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) | |||
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) | |||
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) | |||
@@ -773,6 +772,14 @@ if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") | |||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | |||
endif() | |||
endif() | |||
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE | |||
if(NOT APPLE) | |||
CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT) | |||
if(CXX_COMPILER_SUPPORT_DOT) | |||
message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT") | |||
set(MGB_ENABLE_DOT 1) | |||
endif() | |||
endif() | |||
if(MGE_ARCH STREQUAL "armv7") | |||
# -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default. | |||
@@ -797,15 +804,6 @@ if(MGE_ARCH STREQUAL "aarch64") | |||
set(MARCH "-march=armv8.2-a+fp16") | |||
endif() | |||
if(MGE_ARMV8_2_FEATURE_DOTPROD) | |||
message(STATUS "Enable dotprod feature support in armv8.2") | |||
if(MGE_ARMV8_2_FEATURE_FP16) | |||
set(MARCH "-march=armv8.2-a+fp16+dotprod") | |||
else() | |||
set(MARCH "-march=armv8.2-a+dotprod") | |||
endif() | |||
endif() | |||
if(MGE_WITH_CUDA) | |||
message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ | |||
when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ | |||
@@ -10,6 +10,10 @@ | |||
* implied. | |||
*/ | |||
#pragma once | |||
#include "megdnn/arch.h" | |||
#include "src/common/unroll_macro.h" | |||
#if MGB_ENABLE_DOT | |||
#if defined(__ARM_FEATURE_DOTPROD) | |||
#undef __ARM_FEATURE_DOTPROD | |||
@@ -17,8 +21,6 @@ | |||
#define __ARM_FEATURE_DOTPROD 1 | |||
#endif | |||
#include <arm_neon.h> | |||
#include "megdnn/arch.h" | |||
#include "src/common/unroll_macro.h" | |||
// GCC does not support __nodebug__, it reports: | |||
// '__nodebug__' attribute directive ignored | |||
@@ -4,7 +4,6 @@ set -e | |||
ARCHS=("arm64-v8a" "armeabi-v7a") | |||
BUILD_TYPE=Release | |||
MGE_ARMV8_2_FEATURE_FP16=OFF | |||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
MGE_DISABLE_FLOAT16=OFF | |||
ARCH=arm64-v8a | |||
REMOVE_OLD_BUILD=false | |||
@@ -15,7 +14,6 @@ function usage() { | |||
echo "available args detail:" | |||
echo "-d : Build with Debug mode, default Release mode" | |||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
echo "-a : config build arch available: ${ARCHS[@]}" | |||
echo "-r : remove old build dir before make, default off" | |||
@@ -25,7 +23,7 @@ function usage() { | |||
exit -1 | |||
} | |||
while getopts "rkhdfpa:" arg | |||
while getopts "rkhdfa:" arg | |||
do | |||
case $arg in | |||
d) | |||
@@ -36,10 +34,6 @@ do | |||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
MGE_ARMV8_2_FEATURE_FP16=ON | |||
;; | |||
p) | |||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
;; | |||
k) | |||
echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
MGE_DISABLE_FLOAT16=ON | |||
@@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||
echo "build config summary:" | |||
echo "BUILD_TYPE: $BUILD_TYPE" | |||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
echo "ARCH: $ARCH" | |||
echo "----------------------------------------------------" | |||
@@ -129,7 +122,6 @@ function cmake_build() { | |||
-DMGE_INFERENCE_ONLY=ON \ | |||
-DMGE_WITH_CUDA=OFF \ | |||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
${EXTRA_CMAKE_ARGS} \ | |||
@@ -4,7 +4,6 @@ set -e | |||
ARCHS=("arm64" "armv7") | |||
BUILD_TYPE=Release | |||
MGE_ARMV8_2_FEATURE_FP16=OFF | |||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
MGE_DISABLE_FLOAT16=OFF | |||
ARCH=arm64 | |||
REMOVE_OLD_BUILD=false | |||
@@ -15,7 +14,6 @@ function usage() { | |||
echo "available args detail:" | |||
echo "-d : Build with Debug mode, default Release mode" | |||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
echo "-a : config build arch available: ${ARCHS[@]}" | |||
echo "-r : remove old build dir before make, default off" | |||
@@ -25,7 +23,7 @@ function usage() { | |||
exit -1 | |||
} | |||
while getopts "rkhdfpa:" arg | |||
while getopts "rkhdfa:" arg | |||
do | |||
case $arg in | |||
d) | |||
@@ -36,10 +34,6 @@ do | |||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
MGE_ARMV8_2_FEATURE_FP16=ON | |||
;; | |||
p) | |||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
;; | |||
k) | |||
echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
MGE_DISABLE_FLOAT16=ON | |||
@@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||
echo "build config summary:" | |||
echo "BUILD_TYPE: $BUILD_TYPE" | |||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
echo "ARCH: $ARCH" | |||
echo "----------------------------------------------------" | |||
@@ -126,7 +119,6 @@ function cmake_build() { | |||
-DPYTHON_EXECUTABLE=/usr/local/bin/python3 \ | |||
-DMGE_WITH_CUDA=OFF \ | |||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
${EXTRA_CMAKE_ARGS} \ | |||
@@ -5,7 +5,6 @@ ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") | |||
BUILD_TYPE=Release | |||
MGE_WITH_CUDA=OFF | |||
MGE_ARMV8_2_FEATURE_FP16=OFF | |||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||
MGE_DISABLE_FLOAT16=OFF | |||
ARCH=arm64-v8a | |||
REMOVE_OLD_BUILD=false | |||
@@ -19,7 +18,6 @@ function usage() { | |||
echo "-d : Build with Debug mode, default Release mode" | |||
echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" | |||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | |||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | |||
echo "-a : config build arch available: ${ARCHS[@]}" | |||
echo "-r : remove old build dir before make, default off" | |||
@@ -29,7 +27,7 @@ function usage() { | |||
exit -1 | |||
} | |||
while getopts "rkhdcfpa:" arg | |||
while getopts "rkhdcfa:" arg | |||
do | |||
case $arg in | |||
d) | |||
@@ -44,10 +42,6 @@ do | |||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | |||
MGE_ARMV8_2_FEATURE_FP16=ON | |||
;; | |||
p) | |||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||
;; | |||
k) | |||
echo "open MGE_DISABLE_FLOAT16 for NEON" | |||
MGE_DISABLE_FLOAT16=ON | |||
@@ -87,7 +81,6 @@ echo "build config summary:" | |||
echo "BUILD_TYPE: $BUILD_TYPE" | |||
echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | |||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | |||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | |||
echo "ARCH: $ARCH" | |||
echo "----------------------------------------------------" | |||
@@ -147,7 +140,6 @@ function cmake_build() { | |||
-DMGE_INFERENCE_ONLY=ON \ | |||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | |||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | |||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | |||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | |||
${EXTRA_CMAKE_ARGS} \ | |||
@@ -93,11 +93,6 @@ | |||
#define MGB_ENABLE_CPUINFO 1 | |||
#endif | |||
#ifdef IOS | |||
#undef MGB_ENABLE_CPUINFO | |||
#define MGB_ENABLE_CPUINFO 0 | |||
#endif | |||
//! use one MACRO indicate enable_arm_dotprod | |||
#if __ARM_FEATURE_DOTPROD | |||
#ifdef MGB_ENABLE_DOT | |||
@@ -117,7 +112,12 @@ | |||
#endif | |||
#endif | |||
//! IOS disabled cpuinfo and dotprod, cpuinfo has some problem on ios | |||
#ifdef IOS | |||
#undef MGB_ENABLE_CPUINFO | |||
#define MGB_ENABLE_CPUINFO 0 | |||
#undef MGB_ENABLE_DOT | |||
#endif | |||
// whether to include actual class name in mgb::Typeinfo object; if this is | |||
// disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. | |||