GitOrigin-RevId: a9813a44e4
tags/v0.4.0
@@ -177,7 +177,7 @@ if(MGE_WITH_CUDA) | |||||
if(NOT MGE_CUDA_GENCODE) | if(NOT MGE_CUDA_GENCODE) | ||||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | ||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=0") | |||||
set(MEGDNN_THREADS_512 0) | |||||
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") | if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") | ||||
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52") | set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52") | ||||
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60") | set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60") | ||||
@@ -202,7 +202,7 @@ if(MGE_WITH_CUDA) | |||||
message(FATAL_ERROR "Unsupported CUDA host arch.") | message(FATAL_ERROR "Unsupported CUDA host arch.") | ||||
endif() | endif() | ||||
else() | else() | ||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=1") | |||||
set(MEGDNN_THREADS_512 1) | |||||
endif() | endif() | ||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}") | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}") | ||||
@@ -287,35 +287,31 @@ option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON) | |||||
# MKLDNN build | # MKLDNN build | ||||
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | ||||
add_definitions(-DMEGDNN_X86_WITH_MKL_DNN) | |||||
include(cmake/MKL_DNN.cmake) | include(cmake/MKL_DNN.cmake) | ||||
set(MEGDNN_X86_WITH_MKL_DNN 1) | |||||
endif() | endif() | ||||
# RTTI | # RTTI | ||||
if(MGE_ENABLE_RTTI) | if(MGE_ENABLE_RTTI) | ||||
add_definitions(-DMEGDNN_ENABLE_MANGLING=0 -DMEGDNN_ENABLE_RTTI=1) | |||||
set(MEGDNN_ENABLE_MANGLING 0) | |||||
set(MEGDNN_ENABLE_RTTI 1) | |||||
else() | else() | ||||
add_definitions(-DMEGDNN_ENABLE_MANGLING=1 -DMEGDNN_ENABLE_RTTI=0) | |||||
set(MEGDNN_ENABLE_MANGLING 1) | |||||
set(MEGDNN_ENABLE_RTTI 0) | |||||
endif() | endif() | ||||
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI}) | set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI}) | ||||
# Logging | # Logging | ||||
if(MGE_ENABLE_LOGGING) | |||||
add_definitions(-DMEGDNN_ENABLE_LOGGING=1) | |||||
else() | |||||
add_definitions(-DMEGDNN_ENABLE_LOGGING=0) | |||||
endif() | |||||
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING}) | set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING}) | ||||
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING}) | |||||
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING}) | set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING}) | ||||
# Exception | # Exception | ||||
if(MGE_ENABLE_EXCEPTIONS) | |||||
add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=1) | |||||
else() | |||||
if(NOT MGE_ENABLE_EXCEPTIONS) | |||||
message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.") | message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.") | ||||
add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=0) | |||||
endif() | endif() | ||||
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS}) | set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS}) | ||||
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS}) | |||||
# JIT | # JIT | ||||
if(MGE_WITH_JIT AND MGE_WITH_HALIDE) | if(MGE_WITH_JIT AND MGE_WITH_HALIDE) | ||||
@@ -330,8 +326,15 @@ if(CMAKE_THREAD_LIBS_INIT) | |||||
set(MGB_HAVE_THREAD 1) | set(MGB_HAVE_THREAD 1) | ||||
endif() | endif() | ||||
if(MGE_WITH_TEST) | |||||
# use intra-op multi threads | |||||
set(MEGDNN_ENABLE_MULTI_THREADS 1) | |||||
endif() | |||||
# CUDA | # CUDA | ||||
set(MGB_CUDA ${MGE_WITH_CUDA}) | set(MGB_CUDA ${MGE_WITH_CUDA}) | ||||
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA}) | |||||
# Debug info | # Debug info | ||||
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") | if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") | ||||
@@ -357,8 +360,46 @@ endif() | |||||
# Distributed communication | # Distributed communication | ||||
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED}) | set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED}) | ||||
# MGE_ARCH related flags | |||||
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") | |||||
if(MGE_BLAS STREQUAL "MKL") | |||||
set(MEGDNN_X86_WITH_MKL 1) | |||||
elseif(MGE_BLAS STREQUAL "OpenBLAS") | |||||
set(MEGDNN_X86_WITH_OPENBLAS 1) | |||||
endif() | |||||
endif() | |||||
# Enable Naive | |||||
if(MGE_ARCH STREQUAL "naive") | |||||
set(MEGDNN_NAIVE 1) | |||||
message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.") | |||||
endif() | |||||
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386") | |||||
set(MEGDNN_X86 1) | |||||
if(MGE_ARCH STREQUAL "x86_64") | |||||
set(MEGDNN_X86_64 1) | |||||
set(MEGDNN_64_BIT 1) | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") | |||||
endif() | |||||
else() | |||||
set(MEGDNN_X86_32 1) | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") | |||||
endif() | |||||
endif() | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | |||||
endif() | |||||
endif() | |||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") | |||||
# Write out megbrain_build_config.h | # Write out megbrain_build_config.h | ||||
configure_file(src/core/include/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) | |||||
# It defines macros needed by both megbrain and dnn | |||||
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h) | |||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION include) | install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION include) | ||||
add_subdirectory(dnn) | add_subdirectory(dnn) | ||||
@@ -1,40 +1,3 @@ | |||||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
if(${MGE_BLAS} STREQUAL "MKL") | |||||
add_definitions(-DMEGDNN_X86_WITH_MKL) | |||||
elseif(${MGE_BLAS} STREQUAL "OpenBLAS") | |||||
add_definitions(-DMEGDNN_X86_WITH_OPENBLAS) | |||||
endif() | |||||
endif() | |||||
# Enable Naive | |||||
if(${MGE_ARCH} STREQUAL "naive") | |||||
add_definitions(-DMEGDNN_NAIVE=1) | |||||
message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.") | |||||
else() | |||||
add_definitions(-DMEGDNN_NAIVE=0) | |||||
endif() | |||||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
add_definitions(-DMEGDNN_X86=1) | |||||
if(${MGE_ARCH} STREQUAL "x86_64") | |||||
add_definitions(-DMEGDNN_X86_64 -DMEGDNN_64_BIT) | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") | |||||
endif() | |||||
else() | |||||
add_definitions(-DMEGDNN_X86_32) | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") | |||||
endif() | |||||
endif() | |||||
if(NOT MSVC) | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse") | |||||
endif() | |||||
endif() | |||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}") | |||||
list(APPEND OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/scripts/opr_param_defs.py) | list(APPEND OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/scripts/opr_param_defs.py) | ||||
set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) | set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) | ||||
@@ -89,8 +52,6 @@ add_dependencies(opr_param_defs _opr_param_defs) | |||||
if(MGE_WITH_TEST) | if(MGE_WITH_TEST) | ||||
# use multi threads | |||||
add_definitions (-DMEGDNN_ENABLE_MULTI_THREADS=1) | |||||
add_subdirectory(test) | add_subdirectory(test) | ||||
endif() | endif() | ||||
@@ -9,22 +9,10 @@ | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
*/ | */ | ||||
#include "megbrain_build_config.h" | |||||
#if !defined(__CUDACC__) | #if !defined(__CUDACC__) | ||||
// Try to detect if no architecture flags defined. | |||||
#if !defined(MEGDNN_NAIVE) && !defined(MEGDNN_X86) && \ | |||||
!defined(MEGDNN_X86_64) && !defined(MEGDNN_X86_32) && \ | |||||
!defined(MEGDNN_64_BIT) && !defined(MEGDNN_MIPS) && \ | |||||
!defined(MEGDNN_ARMV7) && !defined(MEGDNN_AARCH64) | |||||
#if defined(__x86_64__) || defined(_M_X64) | |||||
#define MEGDNN_X86 1 | |||||
#define MEGDNN_X86_64 1 | |||||
#define MEGDNN_64_BIT 1 | |||||
#elif defined(__i386) || defined(_M_IX86) | |||||
#define MEGDNN_X86 1 | |||||
#define MEGDNN_X86_32 1 | |||||
#endif | |||||
#endif | |||||
#endif // !defined(__CUDACC__) | #endif // !defined(__CUDACC__) | ||||
@@ -1,7 +1,9 @@ | |||||
set(LIBMEGDNN_DEF) | set(LIBMEGDNN_DEF) | ||||
file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) | file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) | ||||
# Build configure | |||||
list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | |||||
if(NOT ${MGE_ARCH} STREQUAL "naive") | if(NOT ${MGE_ARCH} STREQUAL "naive") | ||||
file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | ||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
@@ -24,7 +26,6 @@ if(MGE_WITH_CUDA) | |||||
file(GLOB_RECURSE CUSOURCES cuda/*.cu) | file(GLOB_RECURSE CUSOURCES cuda/*.cu) | ||||
list(APPEND SOURCES ${CUSOURCES}) | list(APPEND SOURCES ${CUSOURCES}) | ||||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_CUDA=1) | |||||
endif() | endif() | ||||
@@ -33,7 +34,7 @@ add_definitions(${LIBMEGDNN_DEF}) | |||||
add_library(megdnn EXCLUDE_FROM_ALL STATIC ${SOURCES}) | add_library(megdnn EXCLUDE_FROM_ALL STATIC ${SOURCES}) | ||||
target_link_libraries(megdnn opr_param_defs) | target_link_libraries(megdnn opr_param_defs) | ||||
target_include_directories(megdnn PUBLIC ${PROJECT_SOURCE_DIR}/dnn/include) | |||||
target_include_directories(megdnn PUBLIC ${PROJECT_BINARY_DIR}/genfiles ${PROJECT_SOURCE_DIR}/dnn/include) | |||||
target_include_directories(megdnn PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) | target_include_directories(megdnn PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) | ||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") | install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") | ||||
@@ -645,7 +645,7 @@ void ConvBiasImpl::AlgoMatrixMul::kimpl(const NCBKernParam& param, | |||||
} | } | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
static inline void mkldnn_fp32_conv_instance( | static inline void mkldnn_fp32_conv_instance( | ||||
const ConvBiasImpl::NCBKernParam& param, const uint32_t ocpg, | const ConvBiasImpl::NCBKernParam& param, const uint32_t ocpg, | ||||
const uint32_t icpg, const uint32_t group, const uint32_t in, | const uint32_t icpg, const uint32_t group, const uint32_t in, | ||||
@@ -186,7 +186,7 @@ public: | |||||
void* type() const override; | void* type() const override; | ||||
}; | }; | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
class ConvBiasImpl::AlgoMkldnnConv final : public AlgoBase { | class ConvBiasImpl::AlgoMkldnnConv final : public AlgoBase { | ||||
static void kern_mkldnn_fp32(const NCBKernParam& param, | static void kern_mkldnn_fp32(const NCBKernParam& param, | ||||
const NCBKernIndex&); | const NCBKernIndex&); | ||||
@@ -20,13 +20,13 @@ | |||||
#include "src/x86/conv_bias/postprocess_helper.h" | #include "src/x86/conv_bias/postprocess_helper.h" | ||||
#include "src/x86/handle.h" | #include "src/x86/handle.h" | ||||
#include "src/x86/utils.h" | #include "src/x86/utils.h" | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
#include <mkldnn.hpp> | #include <mkldnn.hpp> | ||||
#endif | #endif | ||||
#include <cstring> | #include <cstring> | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
using namespace dnnl; | using namespace dnnl; | ||||
#endif | #endif | ||||
using namespace megdnn; | using namespace megdnn; | ||||
@@ -161,7 +161,7 @@ ConvBiasImpl::AlgoDirectAvx2Stride1Int8::get_kimpls( | |||||
return direct_conv_avx2_stride1::get_kimpls(param, bundle); | return direct_conv_avx2_stride1::get_kimpls(param, bundle); | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | bool ConvBiasImpl::AlgoMkldnnQint8::usable(FallbackConvBiasImpl*, | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
AlgoSelectionStrategy) const { | AlgoSelectionStrategy) const { | ||||
@@ -353,7 +353,7 @@ void ConvBiasImpl::AlgoMkldnnQint8::kern_mkldnn_s8x8x32( | |||||
#undef REORDER_MEMORY | #undef REORDER_MEMORY | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
/* ===================== mkldnn qint8 matmul algo ===================== */ | /* ===================== mkldnn qint8 matmul algo ===================== */ | ||||
bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | bool ConvBiasImpl::AlgoMkldnnMatmulQint8::usable(FallbackConvBiasImpl*, | ||||
const NCBKernSizeParam& param, | const NCBKernSizeParam& param, | ||||
@@ -58,7 +58,7 @@ public: | |||||
void* type() const override; | void* type() const override; | ||||
}; | }; | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
/* ===================== mkldnn qint8 algo ===================== */ | /* ===================== mkldnn qint8 algo ===================== */ | ||||
class ConvBiasImpl::AlgoMkldnnQint8 final : public AlgoBase { | class ConvBiasImpl::AlgoMkldnnQint8 final : public AlgoBase { | ||||
static void kern_mkldnn_s8x8x32(const NCBKernParam& param, | static void kern_mkldnn_s8x8x32(const NCBKernParam& param, | ||||
@@ -25,7 +25,7 @@ namespace { | |||||
uint8_t x86_algo_type_storage; | uint8_t x86_algo_type_storage; | ||||
void* x86_algo_type = &x86_algo_type_storage; | void* x86_algo_type = &x86_algo_type_storage; | ||||
} // anonymous namespace | } // anonymous namespace | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
void* ConvBiasImpl::AlgoMkldnnQint8::type() const { | void* ConvBiasImpl::AlgoMkldnnQint8::type() const { | ||||
return x86_algo_type; | return x86_algo_type; | ||||
} | } | ||||
@@ -78,7 +78,7 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { | |||||
AlgoAVX2DirectConvStride2 avx2_stride2_direct; | AlgoAVX2DirectConvStride2 avx2_stride2_direct; | ||||
AlgoChanWiseAvx2Stride1Qint8 avx2_stride1_chanwsie_qint8; | AlgoChanWiseAvx2Stride1Qint8 avx2_stride1_chanwsie_qint8; | ||||
AlgoMatrixMul matmul; | AlgoMatrixMul matmul; | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
AlgoMkldnnMatmulQint8 mkldnn_matmul_qint8; | AlgoMkldnnMatmulQint8 mkldnn_matmul_qint8; | ||||
//! Because the mkldnnconv need handle | //! Because the mkldnnconv need handle | ||||
AlgoMkldnnQint8 mkldnn_qint8; | AlgoMkldnnQint8 mkldnn_qint8; | ||||
@@ -87,7 +87,7 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj { | |||||
SmallVector<std::unique_ptr<AlgoBase>> refhold; | SmallVector<std::unique_ptr<AlgoBase>> refhold; | ||||
public: | public: | ||||
AlgoPack() { | AlgoPack() { | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
//! Create the mkldnn algo | //! Create the mkldnn algo | ||||
all_algos.emplace_back(&mkldnn_conv_fp32); | all_algos.emplace_back(&mkldnn_conv_fp32); | ||||
all_algos.emplace_back(&mkldnn_matmul_qint8); | all_algos.emplace_back(&mkldnn_matmul_qint8); | ||||
@@ -32,7 +32,7 @@ public: | |||||
class AlgoDirectAvx2Stride1Int8; | class AlgoDirectAvx2Stride1Int8; | ||||
class AlgoAVX2DirectConvStride2; | class AlgoAVX2DirectConvStride2; | ||||
class AlgoChanWiseAvx2Stride1Qint8; | class AlgoChanWiseAvx2Stride1Qint8; | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
class AlgoMkldnnConv; | class AlgoMkldnnConv; | ||||
class AlgoMkldnnQint8; | class AlgoMkldnnQint8; | ||||
class AlgoMkldnnMatmulQint8; | class AlgoMkldnnMatmulQint8; | ||||
@@ -32,7 +32,7 @@ | |||||
#include "src/x86/warp_affine/opr_impl.h" | #include "src/x86/warp_affine/opr_impl.h" | ||||
#include "src/x86/warp_perspective/opr_impl.h" | #include "src/x86/warp_perspective/opr_impl.h" | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
#include <mkl.h> | #include <mkl.h> | ||||
#define STR_HELPER(x) #x | #define STR_HELPER(x) #x | ||||
@@ -57,11 +57,11 @@ HandleImpl::HandleImpl(megcoreComputingHandle_t computing_handle, | |||||
HandleType type) | HandleType type) | ||||
: fallback::HandleImpl::HandleImpl(computing_handle, type) { | : fallback::HandleImpl::HandleImpl(computing_handle, type) { | ||||
disable_denorm(); | disable_denorm(); | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
vmlSetMode(VML_LA | VML_FTZDAZ_ON | VML_ERRMODE_ERRNO); | vmlSetMode(VML_LA | VML_FTZDAZ_ON | VML_ERRMODE_ERRNO); | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
m_mkldnn_engine = dnnl::engine(dnnl::engine::kind::cpu, 0); | m_mkldnn_engine = dnnl::engine(dnnl::engine::kind::cpu, 0); | ||||
m_mkldnn_stream = dnnl::stream(m_mkldnn_engine); | m_mkldnn_stream = dnnl::stream(m_mkldnn_engine); | ||||
#endif | #endif | ||||
@@ -13,7 +13,7 @@ | |||||
#include "src/x86/profile.h" | #include "src/x86/profile.h" | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
#include <mkldnn.hpp> | #include <mkldnn.hpp> | ||||
#endif | #endif | ||||
@@ -31,14 +31,14 @@ public: | |||||
std::unique_ptr<Opr> create_operator(); | std::unique_ptr<Opr> create_operator(); | ||||
size_t alignment_requirement() const override; | size_t alignment_requirement() const override; | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
dnnl::engine mkldnn_engine() { return m_mkldnn_engine; } | dnnl::engine mkldnn_engine() { return m_mkldnn_engine; } | ||||
dnnl::stream mkldnn_stream() { return m_mkldnn_stream; } | dnnl::stream mkldnn_stream() { return m_mkldnn_stream; } | ||||
#endif | #endif | ||||
private: | private: | ||||
ProfileCache m_profile_cache = get_profile_cache(); | ProfileCache m_profile_cache = get_profile_cache(); | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
dnnl::engine m_mkldnn_engine; | dnnl::engine m_mkldnn_engine; | ||||
dnnl::stream m_mkldnn_stream; | dnnl::stream m_mkldnn_stream; | ||||
#endif | #endif | ||||
@@ -18,15 +18,15 @@ | |||||
#include "src/x86/matrix_mul/f32/strategy.h" | #include "src/x86/matrix_mul/f32/strategy.h" | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
#include <mkl.h> | #include <mkl.h> | ||||
#include <mkl_cblas.h> | #include <mkl_cblas.h> | ||||
#elif defined(MEGDNN_X86_WITH_OPENBLAS) | |||||
#elif MEGDNN_X86_WITH_OPENBLAS | |||||
#include <cblas.h> | #include <cblas.h> | ||||
#else | #else | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
#include <mkldnn.h> | #include <mkldnn.h> | ||||
#endif | #endif | ||||
@@ -39,7 +39,7 @@ using namespace x86; | |||||
namespace { | namespace { | ||||
void f32_blas_kern(const MatrixMulImpl::KernParam& kern_param) { | void f32_blas_kern(const MatrixMulImpl::KernParam& kern_param) { | ||||
#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) | |||||
#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS | |||||
auto m = kern_param.M, n = kern_param.N, k = kern_param.K; | auto m = kern_param.M, n = kern_param.N, k = kern_param.K; | ||||
bool trA = kern_param.trA, trB = kern_param.trB; | bool trA = kern_param.trA, trB = kern_param.trB; | ||||
const auto Aptr = kern_param.A<dt_float32>(), | const auto Aptr = kern_param.A<dt_float32>(), | ||||
@@ -55,7 +55,7 @@ void f32_blas_kern(const MatrixMulImpl::KernParam& kern_param) { | |||||
#endif | #endif | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
void f32_blas_kern_only_packA(const MatrixMulImpl::KernParam& kern_param, | void f32_blas_kern_only_packA(const MatrixMulImpl::KernParam& kern_param, | ||||
const void* a_panel, const void* b_panel) { | const void* a_panel, const void* b_panel) { | ||||
MEGDNN_MARK_USED_VAR(b_panel); | MEGDNN_MARK_USED_VAR(b_panel); | ||||
@@ -75,7 +75,7 @@ void f32_blas_kern_only_packA(const MatrixMulImpl::KernParam& kern_param, | |||||
bool MatrixMulImpl::AlgoF32Blas::usable( | bool MatrixMulImpl::AlgoF32Blas::usable( | ||||
const KernSizeParam& kern_size_param) const { | const KernSizeParam& kern_size_param) const { | ||||
#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) | |||||
#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS | |||||
return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && | return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && | ||||
kern_size_param.format == param::MatrixMul::Format::DEFAULT && | kern_size_param.format == param::MatrixMul::Format::DEFAULT && | ||||
kern_size_param.B_type == kern_size_param.A_type && | kern_size_param.B_type == kern_size_param.A_type && | ||||
@@ -93,7 +93,7 @@ MatrixMulImpl::kern_t MatrixMulImpl::AlgoF32Blas::get_kern( | |||||
} | } | ||||
/* ===================== AlgoF32BlasPackA====================== */ | /* ===================== AlgoF32BlasPackA====================== */ | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
bool MatrixMulImpl::AlgoF32MKLPackA::usable( | bool MatrixMulImpl::AlgoF32MKLPackA::usable( | ||||
const KernSizeParam& kern_size_param) const { | const KernSizeParam& kern_size_param) const { | ||||
return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && | return kern_size_param.compute_mode == Param::ComputeMode::DEFAULT && | ||||
@@ -202,7 +202,7 @@ MEGDNN_REG_GEMM_FUNC_FOR_IM2COL_IMPL_DETAIL(AlgoInt8x8x32Vnni, | |||||
#endif | #endif | ||||
/* ===================== Int8 mkldnn algo ===================== */ | /* ===================== Int8 mkldnn algo ===================== */ | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
namespace { | namespace { | ||||
void int8x8x32_kern_mkldnn(const MatrixMulImpl::KernParam& kern_param) { | void int8x8x32_kern_mkldnn(const MatrixMulImpl::KernParam& kern_param) { | ||||
MEGDNN_MARK_USED_VAR(kern_param); | MEGDNN_MARK_USED_VAR(kern_param); | ||||
@@ -28,7 +28,7 @@ public: | |||||
PackMode packmode() const override { return PackMode::NO_PACK; } | PackMode packmode() const override { return PackMode::NO_PACK; } | ||||
}; | }; | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
class MatrixMulImpl::AlgoF32MKLPackA : public AlgoBase { | class MatrixMulImpl::AlgoF32MKLPackA : public AlgoBase { | ||||
public: | public: | ||||
bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
@@ -106,7 +106,7 @@ public: | |||||
}; | }; | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
class MatrixMulImpl::AlgoInt8x8x32Mkldnn : public AlgoBase { | class MatrixMulImpl::AlgoInt8x8x32Mkldnn : public AlgoBase { | ||||
public: | public: | ||||
bool is_reproducible() const override { return true; } | bool is_reproducible() const override { return true; } | ||||
@@ -25,13 +25,13 @@ void* const MatrixMulImpl::sm_x86_algo_type = &x86_algo_type_storage; | |||||
class MatrixMulImpl::AlgoPack : NonCopyableObj { | class MatrixMulImpl::AlgoPack : NonCopyableObj { | ||||
AlgoF32Blas f32blas; | AlgoF32Blas f32blas; | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
AlgoF32MKLPackA f32mkl_packa; | AlgoF32MKLPackA f32mkl_packa; | ||||
#endif | #endif | ||||
#if MEGDNN_X86_WITH_VNNI | #if MEGDNN_X86_WITH_VNNI | ||||
AlgoInt8x8x32Vnni algoint8x8x32vnni; | AlgoInt8x8x32Vnni algoint8x8x32vnni; | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
AlgoInt8x8x32Mkldnn algoint8x8x32mkldnn; | AlgoInt8x8x32Mkldnn algoint8x8x32mkldnn; | ||||
#endif | #endif | ||||
AlgoInt8x8x32AVX2M4N16K2 algoint8x8x32avx2_m4n16k2; | AlgoInt8x8x32AVX2M4N16K2 algoint8x8x32avx2_m4n16k2; | ||||
@@ -42,7 +42,7 @@ class MatrixMulImpl::AlgoPack : NonCopyableObj { | |||||
public: | public: | ||||
AlgoPack() { | AlgoPack() { | ||||
if (is_supported(SIMDType::VNNI)) { | if (is_supported(SIMDType::VNNI)) { | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
all_algos.emplace_back(&algoint8x8x32mkldnn); | all_algos.emplace_back(&algoint8x8x32mkldnn); | ||||
#endif | #endif | ||||
#if MEGDNN_X86_WITH_VNNI | #if MEGDNN_X86_WITH_VNNI | ||||
@@ -53,11 +53,11 @@ public: | |||||
all_algos.emplace_back(&algoint8x8x32avx2_m2n4k16); | all_algos.emplace_back(&algoint8x8x32avx2_m2n4k16); | ||||
all_algos.emplace_back(&algoint8x8x32sse_m4n8k2); | all_algos.emplace_back(&algoint8x8x32sse_m4n8k2); | ||||
all_algos.emplace_back(&algof32mk8_8x8); | all_algos.emplace_back(&algof32mk8_8x8); | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
all_algos.emplace_back(&algoint8x8x32mkldnn); | all_algos.emplace_back(&algoint8x8x32mkldnn); | ||||
#endif | #endif | ||||
all_algos.emplace_back(&f32blas); | all_algos.emplace_back(&f32blas); | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
all_algos.emplace_back(&f32mkl_packa); | all_algos.emplace_back(&f32mkl_packa); | ||||
#endif | #endif | ||||
} | } | ||||
@@ -26,14 +26,14 @@ public: | |||||
protected: | protected: | ||||
static void* const sm_x86_algo_type; | static void* const sm_x86_algo_type; | ||||
class AlgoF32Blas; | class AlgoF32Blas; | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
class AlgoF32MKLPackA; | class AlgoF32MKLPackA; | ||||
#endif | #endif | ||||
#if MEGDNN_X86_WITH_VNNI | #if MEGDNN_X86_WITH_VNNI | ||||
class AlgoInt8x8x32Vnni; | class AlgoInt8x8x32Vnni; | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
class AlgoInt8x8x32Mkldnn; | class AlgoInt8x8x32Mkldnn; | ||||
#endif | #endif | ||||
@@ -17,7 +17,7 @@ | |||||
#include "src/x86/pooling/pooling_special_cases.h" | #include "src/x86/pooling/pooling_special_cases.h" | ||||
#include "src/x86/utils.h" | #include "src/x86/utils.h" | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
#include "mkldnn.hpp" | #include "mkldnn.hpp" | ||||
#endif | #endif | ||||
@@ -45,7 +45,7 @@ WorkspaceBundle get_bundle(const TensorLayout& src, const TensorLayout& dst, | |||||
return ws; | return ws; | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
template <dnnl::memory::format_tag format_tag, bool use_mkl_mem> | template <dnnl::memory::format_tag format_tag, bool use_mkl_mem> | ||||
dnnl::memory tensor_to_mkl_memory(_megdnn_tensor_in src, | dnnl::memory tensor_to_mkl_memory(_megdnn_tensor_in src, | ||||
const dnnl::engine& mkldnn_eng, | const dnnl::engine& mkldnn_eng, | ||||
@@ -164,7 +164,7 @@ void PoolingImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_out dst, | |||||
return; | return; | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
// Mkldnn provide optimized code for nhwc int8 pooling now. | // Mkldnn provide optimized code for nhwc int8 pooling now. | ||||
// Mkldnn can not change the layout automatic. | // Mkldnn can not change the layout automatic. | ||||
@@ -18,7 +18,7 @@ | |||||
#include <intrin.h> | #include <intrin.h> | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) | |||||
#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS | |||||
#include <pmmintrin.h> | #include <pmmintrin.h> | ||||
#endif | #endif | ||||
@@ -777,7 +777,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_INT8x8x32) { | |||||
.execs({arg.src, arg.filter, {}, {}, {}}); \ | .execs({arg.src, arg.filter, {}, {}, {}}); \ | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
if (megdnn::x86::is_supported(x86::SIMDType::VNNI)) { | if (megdnn::x86::is_supported(x86::SIMDType::VNNI)) { | ||||
cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); | cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); | ||||
} | } | ||||
@@ -846,14 +846,14 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32) { | |||||
{arg.src, arg.filter, arg.bias, {}, {}}); \ | {arg.src, arg.filter, arg.bias, {}, {}}); \ | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL) || defined(MEGDNN_X86_WITH_OPENBLAS) | |||||
#if MEGDNN_X86_WITH_MKL || MEGDNN_X86_WITH_OPENBLAS | |||||
cb("IM2COLMATMUL:X86_F32_BLAS"); | cb("IM2COLMATMUL:X86_F32_BLAS"); | ||||
#endif | #endif | ||||
#undef cb | #undef cb | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) { | TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_FP32_PACKA) { | ||||
using namespace conv_bias; | using namespace conv_bias; | ||||
std::vector<TestArg> args; | std::vector<TestArg> args; | ||||
@@ -973,7 +973,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_IM2COLMATMUL_QINT8) { | |||||
.execs({arg.src, arg.filter, {}, {}, {}}); \ | .execs({arg.src, arg.filter, {}, {}, {}}); \ | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
if (x86::is_supported(x86::SIMDType::VNNI)) { | if (x86::is_supported(x86::SIMDType::VNNI)) { | ||||
cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); | cb("IM2COLMATMUL:X86_INT8X8X32_MKLDNN"); | ||||
} | } | ||||
@@ -1057,7 +1057,7 @@ TEST_F(X86, CONV_BIAS_MATMUL) { | |||||
} | } | ||||
} | } | ||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
static void x86_benchmark_fp32_mkldnn(Handle* handle) { | static void x86_benchmark_fp32_mkldnn(Handle* handle) { | ||||
constexpr size_t RUNS = 30; | constexpr size_t RUNS = 30; | ||||
param::ConvBias param; | param::ConvBias param; | ||||
@@ -1304,7 +1304,7 @@ TEST_F(X86_MULTI_THREADS, CONV_BIAS_WINOGRAD_WEIGHT_PREPROCESS) { | |||||
} | } | ||||
/*********************************** End winograd ************************/ | /*********************************** End winograd ************************/ | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
static void x86_correctness_fp32_mkldnn_run( | static void x86_correctness_fp32_mkldnn_run( | ||||
Checker<ConvBias>& checker, UniformIntRNG& rng, Handle* handle, | Checker<ConvBias>& checker, UniformIntRNG& rng, Handle* handle, | ||||
ConvBiasForward::BiasMode bias_mode, | ConvBiasForward::BiasMode bias_mode, | ||||
@@ -20,7 +20,7 @@ | |||||
#include "test/common/workspace_wrapper.h" | #include "test/common/workspace_wrapper.h" | ||||
namespace { | namespace { | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
struct ConvArg { | struct ConvArg { | ||||
size_t batch_size, fh, sh, ph, ic, ih, iw, oc, groups; | size_t batch_size, fh, sh, ph, ic, ih, iw, oc, groups; | ||||
}; | }; | ||||
@@ -224,7 +224,7 @@ TEST_F(X86, DEFAULT_CONV_MATMUL) { | |||||
} | } | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
TEST_F(X86, CONVOLUTION_FORWARD_INT8) { | TEST_F(X86, CONVOLUTION_FORWARD_INT8) { | ||||
Checker<ConvolutionForward> checker(handle()); | Checker<ConvolutionForward> checker(handle()); | ||||
checker.set_before_exec_callback( | checker.set_before_exec_callback( | ||||
@@ -369,7 +369,7 @@ TEST_F(X86, CONVOLUTION_DIRECT_MKLDNN_C8) { | |||||
#endif | #endif | ||||
#if MEGDNN_WITH_BENCHMARK | #if MEGDNN_WITH_BENCHMARK | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
TEST_F(X86, BENCHMARK_CONVOLUTION_I8x8x32_MKLDNN) { | TEST_F(X86, BENCHMARK_CONVOLUTION_I8x8x32_MKLDNN) { | ||||
using namespace convolution; | using namespace convolution; | ||||
using Param = param::Convolution; | using Param = param::Convolution; | ||||
@@ -26,7 +26,7 @@ TEST_F(X86, MATRIX_MUL_VNNI_8X8X32) { | |||||
} | } | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
TEST_F(X86, MATRIX_MUL_MKLDNN_8X8X32) { | TEST_F(X86, MATRIX_MUL_MKLDNN_8X8X32) { | ||||
if (is_supported(SIMDType::VNNI)) { | if (is_supported(SIMDType::VNNI)) { | ||||
matrix_mul::check_matrix_mul(dtype::Int8{}, dtype::Int8{}, | matrix_mul::check_matrix_mul(dtype::Int8{}, dtype::Int8{}, | ||||
@@ -52,7 +52,7 @@ TEST_F(X86, MATRIX_MUL_SSE_8X8X32) { | |||||
handle(), "X86_INT8X8X32_SSE_4X8X2"); | handle(), "X86_INT8X8X32_SSE_4X8X2"); | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL) | |||||
#if MEGDNN_X86_WITH_MKL | |||||
TEST_F(X86, MATRIX_MUL_MKL_PACKA) { | TEST_F(X86, MATRIX_MUL_MKL_PACKA) { | ||||
matrix_mul::check_matrix_mul(dtype::Float32{}, dtype::Float32{}, | matrix_mul::check_matrix_mul(dtype::Float32{}, dtype::Float32{}, | ||||
dtype::Float32{}, handle(), | dtype::Float32{}, handle(), | ||||
@@ -93,7 +93,7 @@ TEST_F(X86, BENCHMARK_MATRIX_MUL_8X8X32) { | |||||
AlgoChecker<MatrixMul>("X86_INT8X8X32_VNNI")); | AlgoChecker<MatrixMul>("X86_INT8X8X32_VNNI")); | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
Benchmarker<MatrixMul> benchmarker_mkldnn(handle()); | Benchmarker<MatrixMul> benchmarker_mkldnn(handle()); | ||||
benchmarker_mkldnn.set_times(RUNS) | benchmarker_mkldnn.set_times(RUNS) | ||||
.set_dtype(0, dtype::Int8{}) | .set_dtype(0, dtype::Int8{}) | ||||
@@ -162,7 +162,7 @@ TEST_F(X86, BENCHMARK_MATRIX_MUL_8X8X32) { | |||||
} | } | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
if (is_supported(SIMDType::VNNI)) { | if (is_supported(SIMDType::VNNI)) { | ||||
auto mkldnn_used = | auto mkldnn_used = | ||||
benchmarker_mkldnn.exec({{M, K}, {K, N}, {}}) / RUNS; | benchmarker_mkldnn.exec({{M, K}, {K, N}, {}}) / RUNS; | ||||
@@ -24,7 +24,7 @@ TEST_F(X86, POOLING) { | |||||
} | } | ||||
} | } | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
TEST_F(X86, POOLING88) { | TEST_F(X86, POOLING88) { | ||||
Checker<Pooling> checker(handle()); | Checker<Pooling> checker(handle()); | ||||
auto args = pooling::get_args(); | auto args = pooling::get_args(); | ||||
@@ -105,7 +105,7 @@ TEST_F(X86_MULTI_THREADS, BENCHMARK_POOLING) { | |||||
test_x86_megdnn_pooling(handle()); | test_x86_megdnn_pooling(handle()); | ||||
} | } | ||||
#endif | #endif | ||||
#if defined(MEGDNN_X86_WITH_MKL_DNN) | |||||
#if MEGDNN_X86_WITH_MKL_DNN | |||||
TEST_F(X86, POOLING_INT8) { | TEST_F(X86, POOLING_INT8) { | ||||
auto args = pooling::get_args(); | auto args = pooling::get_args(); | ||||
for (auto&& arg : args) { | for (auto&& arg : args) { | ||||
@@ -0,0 +1,198 @@ | |||||
/** | |||||
* \file src/core/include/megbrain_build_config.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
*/ | |||||
#ifndef _HEADER_MGB_BUILD_CONFIG | |||||
#define _HEADER_MGB_BUILD_CONFIG | |||||
#cmakedefine01 MGB_CUDA | |||||
#cmakedefine01 MGB_ASSERT_LOC | |||||
#cmakedefine01 MGB_ENABLE_DEBUG_UTIL | |||||
#cmakedefine01 MGB_ENABLE_LOGGING | |||||
#cmakedefine01 MGB_ENABLE_GRAD | |||||
#cmakedefine01 MGB_VERBOSE_TYPEINFO_NAME | |||||
#cmakedefine01 MGB_BUILD_SLIM_SERVING | |||||
#cmakedefine01 MGB_ENABLE_EXCEPTION | |||||
#cmakedefine01 MGB_JIT | |||||
#cmakedefine01 MGB_JIT_HALIDE | |||||
#cmakedefine01 MGB_ENABLE_TENSOR_RT | |||||
#cmakedefine01 MGB_ENABLE_JSON | |||||
#cmakedefine01 MGB_HAVE_THREAD | |||||
#cmakedefine01 MGB_ENABLE_OPR_MM | |||||
#cmakedefine01 MEGDNN_ENABLE_MANGLING | |||||
// DNN related flags | |||||
// Platform macro's | |||||
#cmakedefine01 MEGDNN_WITH_CUDA | |||||
#cmakedefine01 MEGDNN_X86_WITH_MKL | |||||
#cmakedefine01 MEGDNN_X86_WITH_OPENBLAS | |||||
#cmakedefine01 MEGDNN_X86_WITH_MKL_DNN | |||||
#cmakedefine01 MEGDNN_ENABLE_RTTI | |||||
#cmakedefine01 MEGDNN_ENABLE_LOGGING | |||||
#cmakedefine01 MEGDNN_ENABLE_LOGGING | |||||
#cmakedefine01 MEGDNN_ENABLE_EXCEPTIONS | |||||
#cmakedefine01 MEGDNN_NAIVE | |||||
#cmakedefine01 MEGDNN_X86 | |||||
#cmakedefine01 MEGDNN_X86_64 | |||||
#cmakedefine01 MEGDNN_64_BIT | |||||
#cmakedefine01 MEGDNN_THREADS_512 | |||||
#cmakedefine01 MEGDNN_ENABLE_MULTI_THREADS | |||||
// whether cuda is available | |||||
#ifndef MGB_CUDA | |||||
#define MGB_CUDA 1 | |||||
#endif | |||||
// whether to include file/line location for assert message | |||||
#ifndef MGB_ASSERT_LOC | |||||
#define MGB_ASSERT_LOC 1 | |||||
#endif | |||||
// whether to enable utils/debug.h and other debug methods | |||||
#ifndef MGB_ENABLE_DEBUG_UTIL | |||||
#define MGB_ENABLE_DEBUG_UTIL 1 | |||||
#endif | |||||
// whether to enable logging | |||||
#ifndef MGB_ENABLE_LOGGING | |||||
#define MGB_ENABLE_LOGGING 1 | |||||
#endif | |||||
// whether to enable registering opr grad functions | |||||
#ifndef MGB_ENABLE_GRAD | |||||
#define MGB_ENABLE_GRAD 1 | |||||
#endif | |||||
// whether to include actual class name in mgb::Typeinfo object; if this is | |||||
// disabled, mgb::serialization::OprRegistry::find_opr_by_name would not work. | |||||
#ifndef MGB_VERBOSE_TYPEINFO_NAME | |||||
#define MGB_VERBOSE_TYPEINFO_NAME 1 | |||||
#endif | |||||
// whether to enbale configuing megbrain internals through env vars | |||||
#ifndef MGB_ENABLE_GETENV | |||||
#define MGB_ENABLE_GETENV 1 | |||||
#endif | |||||
// whether to remove unnecessary features when used for serving | |||||
#ifndef MGB_BUILD_SLIM_SERVING | |||||
#define MGB_BUILD_SLIM_SERVING 0 | |||||
#endif | |||||
// whether to enable exception | |||||
#ifndef MGB_ENABLE_EXCEPTION | |||||
#if __EXCEPTIONS | |||||
#define MGB_ENABLE_EXCEPTION 1 | |||||
#else | |||||
#define MGB_ENABLE_EXCEPTION 0 | |||||
#endif | |||||
#endif | |||||
// whether <thread> is available and usable | |||||
#ifndef MGB_HAVE_THREAD | |||||
#define MGB_HAVE_THREAD 1 | |||||
#endif | |||||
// whether to trade thread safety for memory usage | |||||
#ifndef MGB_THREAD_SAFE | |||||
#define MGB_THREAD_SAFE MGB_HAVE_THREAD | |||||
#endif | |||||
// whether to enable JIT | |||||
#ifndef MGB_JIT | |||||
#define MGB_JIT 1 | |||||
#endif | |||||
#ifndef MGB_JIT_HALIDE | |||||
#define MGB_JIT_HALIDE 0 | |||||
#endif | |||||
// whether to enable TensorRT support | |||||
#ifndef MGB_ENABLE_TENSOR_RT | |||||
#define MGB_ENABLE_TENSOR_RT MGB_CUDA | |||||
#endif | |||||
// whether to enable fastrun profile | |||||
#ifndef MGB_ENABLE_FASTRUN | |||||
#define MGB_ENABLE_FASTRUN 1 | |||||
#endif | |||||
/* ================= following are more finegrind controls ================= */ | |||||
// whether to enable json dumper | |||||
#ifndef MGB_ENABLE_JSON | |||||
#define MGB_ENABLE_JSON !MGB_BUILD_SLIM_SERVING | |||||
#endif | |||||
// whether to enable distributed communication | |||||
#ifndef MGB_ENABLE_OPR_MM | |||||
#define MGB_ENABLE_OPR_MM 0 | |||||
#endif | |||||
/* ================= DNN related flags ================= */ | |||||
// whether to use mkl lib | |||||
#ifndef MEGDNN_X86_WITH_MKL | |||||
#define MEGDNN_X86_WITH_MKL 0 | |||||
#endif | |||||
// whether to enable rtti | |||||
#ifndef MEGDNN_ENABLE_RTTI | |||||
#define MEGDNN_ENABLE_RTTI 1 | |||||
#endif | |||||
// whether to enable mangling | |||||
#ifndef MEGDNN_ENABLE_MANGLING | |||||
#define MEGDNN_ENABLE_MANGLING !MEGDNN_ENABLE_RTTI | |||||
#endif | |||||
// whether to enable logging | |||||
#ifndef MEGDNN_ENABLE_LOGGING | |||||
#define MEGDNN_ENABLE_LOGGING MGB_ENABLE_LOGGING | |||||
#endif | |||||
// whether to enable exception | |||||
#ifndef MEGDNN_ENABLE_EXCEPTIONS | |||||
#define MEGDNN_ENABLE_EXCEPTIONS MGB_ENABLE_EXCEPTION | |||||
#endif | |||||
// whether to build naive | |||||
#ifndef MEGDNN_NAIVE | |||||
#define MEGDNN_NAIVE 0 | |||||
#endif | |||||
// whether to build x86 | |||||
#ifndef MEGDNN_X86 | |||||
#define MEGDNN_X86 0 | |||||
#endif | |||||
// whether to use cuda thread 512 | |||||
#ifndef MEGDNN_THREADS_512 | |||||
#define MEGDNN_THREADS_512 0 | |||||
#endif | |||||
// whether to enable intra-op multi threads | |||||
#ifndef MEGDNN_ENABLE_MULTI_THREADS | |||||
#define MEGDNN_ENABLE_MULTI_THREADS 1 | |||||
#endif | |||||
#ifndef MEGDNN_X86_WITH_OPENBLAS | |||||
#define MEGDNN_X86_WITH_OPENBLAS 0 | |||||
#endif | |||||
#ifndef MEGDNN_X86_WITH_MKL_DNN | |||||
#define MEGDNN_X86_WITH_MKL_DNN 0 | |||||
#endif | |||||
#endif // _HEADER_MGB_BUILD_CONFIG |