GitOrigin-RevId: d412108732
tags/v1.3.1
@@ -34,7 +34,6 @@ option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF) | |||||
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF) | option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF) | ||||
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF) | option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF) | ||||
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF) | option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF) | ||||
option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF) | |||||
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) | option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF) | ||||
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) | option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON) | ||||
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) | option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON) | ||||
@@ -773,6 +772,14 @@ if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | ||||
endif() | endif() | ||||
endif() | endif() | ||||
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE | |||||
if(NOT APPLE) | |||||
CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT) | |||||
if(CXX_COMPILER_SUPPORT_DOT) | |||||
message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT") | |||||
set(MGB_ENABLE_DOT 1) | |||||
endif() | |||||
endif() | |||||
if(MGE_ARCH STREQUAL "armv7") | if(MGE_ARCH STREQUAL "armv7") | ||||
# -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default. | # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default. | ||||
@@ -797,15 +804,6 @@ if(MGE_ARCH STREQUAL "aarch64") | |||||
set(MARCH "-march=armv8.2-a+fp16") | set(MARCH "-march=armv8.2-a+fp16") | ||||
endif() | endif() | ||||
if(MGE_ARMV8_2_FEATURE_DOTPROD) | |||||
message(STATUS "Enable dotprod feature support in armv8.2") | |||||
if(MGE_ARMV8_2_FEATURE_FP16) | |||||
set(MARCH "-march=armv8.2-a+fp16+dotprod") | |||||
else() | |||||
set(MARCH "-march=armv8.2-a+dotprod") | |||||
endif() | |||||
endif() | |||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ | message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\ | ||||
when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ | when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\ | ||||
@@ -10,6 +10,10 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#pragma once | #pragma once | ||||
#include "megdnn/arch.h" | |||||
#include "src/common/unroll_macro.h" | |||||
#if MGB_ENABLE_DOT | #if MGB_ENABLE_DOT | ||||
#if defined(__ARM_FEATURE_DOTPROD) | #if defined(__ARM_FEATURE_DOTPROD) | ||||
#undef __ARM_FEATURE_DOTPROD | #undef __ARM_FEATURE_DOTPROD | ||||
@@ -17,8 +21,6 @@ | |||||
#define __ARM_FEATURE_DOTPROD 1 | #define __ARM_FEATURE_DOTPROD 1 | ||||
#endif | #endif | ||||
#include <arm_neon.h> | #include <arm_neon.h> | ||||
#include "megdnn/arch.h" | |||||
#include "src/common/unroll_macro.h" | |||||
// GCC does not support __nodebug__, it reports: | // GCC does not support __nodebug__, it reports: | ||||
// '__nodebug__' attribute directive ignored | // '__nodebug__' attribute directive ignored | ||||
@@ -4,7 +4,6 @@ set -e | |||||
ARCHS=("arm64-v8a" "armeabi-v7a") | ARCHS=("arm64-v8a" "armeabi-v7a") | ||||
BUILD_TYPE=Release | BUILD_TYPE=Release | ||||
MGE_ARMV8_2_FEATURE_FP16=OFF | MGE_ARMV8_2_FEATURE_FP16=OFF | ||||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||||
MGE_DISABLE_FLOAT16=OFF | MGE_DISABLE_FLOAT16=OFF | ||||
ARCH=arm64-v8a | ARCH=arm64-v8a | ||||
REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
@@ -15,7 +14,6 @@ function usage() { | |||||
echo "available args detail:" | echo "available args detail:" | ||||
echo "-d : Build with Debug mode, default Release mode" | echo "-d : Build with Debug mode, default Release mode" | ||||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | ||||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | ||||
echo "-a : config build arch available: ${ARCHS[@]}" | echo "-a : config build arch available: ${ARCHS[@]}" | ||||
echo "-r : remove old build dir before make, default off" | echo "-r : remove old build dir before make, default off" | ||||
@@ -25,7 +23,7 @@ function usage() { | |||||
exit -1 | exit -1 | ||||
} | } | ||||
while getopts "rkhdfpa:" arg | |||||
while getopts "rkhdfa:" arg | |||||
do | do | ||||
case $arg in | case $arg in | ||||
d) | d) | ||||
@@ -36,10 +34,6 @@ do | |||||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | ||||
MGE_ARMV8_2_FEATURE_FP16=ON | MGE_ARMV8_2_FEATURE_FP16=ON | ||||
;; | ;; | ||||
p) | |||||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||||
;; | |||||
k) | k) | ||||
echo "open MGE_DISABLE_FLOAT16 for NEON" | echo "open MGE_DISABLE_FLOAT16 for NEON" | ||||
MGE_DISABLE_FLOAT16=ON | MGE_DISABLE_FLOAT16=ON | ||||
@@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||||
echo "build config summary:" | echo "build config summary:" | ||||
echo "BUILD_TYPE: $BUILD_TYPE" | echo "BUILD_TYPE: $BUILD_TYPE" | ||||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | ||||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | ||||
echo "ARCH: $ARCH" | echo "ARCH: $ARCH" | ||||
echo "----------------------------------------------------" | echo "----------------------------------------------------" | ||||
@@ -129,7 +122,6 @@ function cmake_build() { | |||||
-DMGE_INFERENCE_ONLY=ON \ | -DMGE_INFERENCE_ONLY=ON \ | ||||
-DMGE_WITH_CUDA=OFF \ | -DMGE_WITH_CUDA=OFF \ | ||||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | ||||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | ||||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | ||||
${EXTRA_CMAKE_ARGS} \ | ${EXTRA_CMAKE_ARGS} \ | ||||
@@ -4,7 +4,6 @@ set -e | |||||
ARCHS=("arm64" "armv7") | ARCHS=("arm64" "armv7") | ||||
BUILD_TYPE=Release | BUILD_TYPE=Release | ||||
MGE_ARMV8_2_FEATURE_FP16=OFF | MGE_ARMV8_2_FEATURE_FP16=OFF | ||||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||||
MGE_DISABLE_FLOAT16=OFF | MGE_DISABLE_FLOAT16=OFF | ||||
ARCH=arm64 | ARCH=arm64 | ||||
REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
@@ -15,7 +14,6 @@ function usage() { | |||||
echo "available args detail:" | echo "available args detail:" | ||||
echo "-d : Build with Debug mode, default Release mode" | echo "-d : Build with Debug mode, default Release mode" | ||||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | ||||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | ||||
echo "-a : config build arch available: ${ARCHS[@]}" | echo "-a : config build arch available: ${ARCHS[@]}" | ||||
echo "-r : remove old build dir before make, default off" | echo "-r : remove old build dir before make, default off" | ||||
@@ -25,7 +23,7 @@ function usage() { | |||||
exit -1 | exit -1 | ||||
} | } | ||||
while getopts "rkhdfpa:" arg | |||||
while getopts "rkhdfa:" arg | |||||
do | do | ||||
case $arg in | case $arg in | ||||
d) | d) | ||||
@@ -36,10 +34,6 @@ do | |||||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | ||||
MGE_ARMV8_2_FEATURE_FP16=ON | MGE_ARMV8_2_FEATURE_FP16=ON | ||||
;; | ;; | ||||
p) | |||||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||||
;; | |||||
k) | k) | ||||
echo "open MGE_DISABLE_FLOAT16 for NEON" | echo "open MGE_DISABLE_FLOAT16 for NEON" | ||||
MGE_DISABLE_FLOAT16=ON | MGE_DISABLE_FLOAT16=ON | ||||
@@ -78,7 +72,6 @@ echo "----------------------------------------------------" | |||||
echo "build config summary:" | echo "build config summary:" | ||||
echo "BUILD_TYPE: $BUILD_TYPE" | echo "BUILD_TYPE: $BUILD_TYPE" | ||||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | ||||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | ||||
echo "ARCH: $ARCH" | echo "ARCH: $ARCH" | ||||
echo "----------------------------------------------------" | echo "----------------------------------------------------" | ||||
@@ -126,7 +119,6 @@ function cmake_build() { | |||||
-DPYTHON_EXECUTABLE=/usr/local/bin/python3 \ | -DPYTHON_EXECUTABLE=/usr/local/bin/python3 \ | ||||
-DMGE_WITH_CUDA=OFF \ | -DMGE_WITH_CUDA=OFF \ | ||||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | ||||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | ||||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | ||||
${EXTRA_CMAKE_ARGS} \ | ${EXTRA_CMAKE_ARGS} \ | ||||
@@ -5,7 +5,6 @@ ARCHS=("arm64-v8a" "armeabi-v7a-softfp" "armeabi-v7a-hardfp") | |||||
BUILD_TYPE=Release | BUILD_TYPE=Release | ||||
MGE_WITH_CUDA=OFF | MGE_WITH_CUDA=OFF | ||||
MGE_ARMV8_2_FEATURE_FP16=OFF | MGE_ARMV8_2_FEATURE_FP16=OFF | ||||
MGE_ARMV8_2_FEATURE_DOTPROD=OFF | |||||
MGE_DISABLE_FLOAT16=OFF | MGE_DISABLE_FLOAT16=OFF | ||||
ARCH=arm64-v8a | ARCH=arm64-v8a | ||||
REMOVE_OLD_BUILD=false | REMOVE_OLD_BUILD=false | ||||
@@ -19,7 +18,6 @@ function usage() { | |||||
echo "-d : Build with Debug mode, default Release mode" | echo "-d : Build with Debug mode, default Release mode" | ||||
echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" | echo "-c : Build with CUDA, default without CUDA(for arm with cuda, example tx1)" | ||||
echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | echo "-f : enable MGE_ARMV8_2_FEATURE_FP16 for ARM64, need toolchain and hardware support" | ||||
echo "-p : enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64, need toolchain and hardware support" | |||||
echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | echo "-k : open MGE_DISABLE_FLOAT16 for NEON " | ||||
echo "-a : config build arch available: ${ARCHS[@]}" | echo "-a : config build arch available: ${ARCHS[@]}" | ||||
echo "-r : remove old build dir before make, default off" | echo "-r : remove old build dir before make, default off" | ||||
@@ -29,7 +27,7 @@ function usage() { | |||||
exit -1 | exit -1 | ||||
} | } | ||||
while getopts "rkhdcfpa:" arg | |||||
while getopts "rkhdcfa:" arg | |||||
do | do | ||||
case $arg in | case $arg in | ||||
d) | d) | ||||
@@ -44,10 +42,6 @@ do | |||||
echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | echo "enable MGE_ARMV8_2_FEATURE_FP16 for ARM64" | ||||
MGE_ARMV8_2_FEATURE_FP16=ON | MGE_ARMV8_2_FEATURE_FP16=ON | ||||
;; | ;; | ||||
p) | |||||
echo "enable MGE_ARMV8_2_FEATURE_DOTPROD for ARM64" | |||||
MGE_ARMV8_2_FEATURE_DOTPROD=ON | |||||
;; | |||||
k) | k) | ||||
echo "open MGE_DISABLE_FLOAT16 for NEON" | echo "open MGE_DISABLE_FLOAT16 for NEON" | ||||
MGE_DISABLE_FLOAT16=ON | MGE_DISABLE_FLOAT16=ON | ||||
@@ -87,7 +81,6 @@ echo "build config summary:" | |||||
echo "BUILD_TYPE: $BUILD_TYPE" | echo "BUILD_TYPE: $BUILD_TYPE" | ||||
echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | echo "MGE_WITH_CUDA: $MGE_WITH_CUDA" | ||||
echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | echo "MGE_ARMV8_2_FEATURE_FP16: $MGE_ARMV8_2_FEATURE_FP16" | ||||
echo "MGE_ARMV8_2_FEATURE_DOTPROD: $MGE_ARMV8_2_FEATURE_DOTPROD" | |||||
echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | echo "MGE_DISABLE_FLOAT16: $MGE_DISABLE_FLOAT16" | ||||
echo "ARCH: $ARCH" | echo "ARCH: $ARCH" | ||||
echo "----------------------------------------------------" | echo "----------------------------------------------------" | ||||
@@ -147,7 +140,6 @@ function cmake_build() { | |||||
-DMGE_INFERENCE_ONLY=ON \ | -DMGE_INFERENCE_ONLY=ON \ | ||||
-DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | -DMGE_WITH_CUDA=$MGE_WITH_CUDA \ | ||||
-DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | -DMGE_ARMV8_2_FEATURE_FP16= $MGE_ARMV8_2_FEATURE_FP16 \ | ||||
-DMGE_ARMV8_2_FEATURE_DOTPROD=$MGE_ARMV8_2_FEATURE_DOTPROD \ | |||||
-DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | -DMGE_DISABLE_FLOAT16=$MGE_DISABLE_FLOAT16 \ | ||||
-DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ | ||||
${EXTRA_CMAKE_ARGS} \ | ${EXTRA_CMAKE_ARGS} \ | ||||
@@ -93,11 +93,6 @@ | |||||
#define MGB_ENABLE_CPUINFO 1 | #define MGB_ENABLE_CPUINFO 1 | ||||
#endif | #endif | ||||
#ifdef IOS | |||||
#undef MGB_ENABLE_CPUINFO | |||||
#define MGB_ENABLE_CPUINFO 0 | |||||
#endif | |||||
//! use one MACRO indicate enable_arm_dotprod | //! use one MACRO indicate enable_arm_dotprod | ||||
#if __ARM_FEATURE_DOTPROD | #if __ARM_FEATURE_DOTPROD | ||||
#ifdef MGB_ENABLE_DOT | #ifdef MGB_ENABLE_DOT | ||||
@@ -117,7 +112,12 @@ | |||||
#endif | #endif | ||||
#endif | #endif | ||||
//! IOS disabled cpuinfo and dotprod, cpuinfo has some problem on ios | |||||
#ifdef IOS | |||||
#undef MGB_ENABLE_CPUINFO | |||||
#define MGB_ENABLE_CPUINFO 0 | |||||
#undef MGB_ENABLE_DOT | |||||
#endif | |||||
// whether to include actual class name in mgb::Typeinfo object; if this is | // whether to include actual class name in mgb::Typeinfo object; if this is | ||||
// disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. | // disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. | ||||