@@ -29,6 +29,7 @@ jobs: | |||
uses: actions/checkout@v2 | |||
- name: Checkout submodules | |||
run: | | |||
apt update&&apt install ninja-build | |||
./third_party/prepare.sh | |||
./third_party/install-mkl.sh | |||
- name: Build MegEngine | |||
@@ -57,6 +58,7 @@ jobs: | |||
uses: actions/checkout@v2 | |||
- name: Checkout submodules | |||
run: | | |||
apt update&&apt install ninja-build | |||
./third_party/prepare.sh | |||
./third_party/install-mkl.sh | |||
- name: Build MegEngine | |||
@@ -27,7 +27,8 @@ function build() { | |||
-DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | |||
-DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | |||
-DMGE_WITH_TEST=ON \ | |||
-DCMAKE_BUILD_TYPE=RelWithDebInfo | |||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |||
-DMGE_WITH_CUSTOM_OP=ON | |||
make -j$(($(nproc) * 2)) -I ${build_dir} | |||
make develop | |||
popd >/dev/null | |||
@@ -1,59 +1,56 @@ | |||
# Copyright 2015 Google Inc. All rights reserved. | |||
# | |||
# Licensed under the Apache License, Version 2.0 (the "License"); | |||
# you may not use this file except in compliance with the License. | |||
# You may obtain a copy of the License at | |||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this | |||
# file except in compliance with the License. You may obtain a copy of the License at | |||
# | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# http://www.apache.org/licenses/LICENSE-2.0 | |||
# | |||
# Unless required by applicable law or agreed to in writing, software | |||
# distributed under the License is distributed on an "AS IS" BASIS, | |||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
# See the License for the specific language governing permissions and | |||
# limitations under the License. | |||
# Unless required by applicable law or agreed to in writing, software distributed under | |||
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | |||
# ANY KIND, either express or implied. See the License for the specific language | |||
# governing permissions and limitations under the License. | |||
# General function to create FlatBuffer build rules for the given list of | |||
# schemas. | |||
# General function to create FlatBuffer build rules for the given list of schemas. | |||
# | |||
# flatbuffers_schemas: A list of flatbuffer schema files to process. | |||
# | |||
# schema_include_dirs: A list of schema file include directories, which will be | |||
# passed to flatc via the -I parameter. | |||
# schema_include_dirs: A list of schema file include directories, which will be passed | |||
# to flatc via the -I parameter. | |||
# | |||
# custom_target_name: The generated files will be added as dependencies for a | |||
# new custom target with this name. You should add that target as a dependency | |||
# for your main target to ensure these files are built. You can also retrieve | |||
# various properties from this target, such as GENERATED_INCLUDES_DIR, | |||
# BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||
# custom_target_name: The generated files will be added as dependencies for a new custom | |||
# target with this name. You should add that target as a dependency for your main target | |||
# to ensure these files are built. You can also retrieve various properties from this | |||
# target, such as GENERATED_INCLUDES_DIR, BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||
# | |||
# additional_dependencies: A list of additional dependencies that you'd like | |||
# all generated files to depend on. Pass in a blank string if you have none. | |||
# additional_dependencies: A list of additional dependencies that you'd like all | |||
# generated files to depend on. Pass in a blank string if you have none. | |||
# | |||
# generated_includes_dir: Where to generate the C++ header files for these | |||
# schemas. The generated includes directory will automatically be added to | |||
# CMake's include directories, and will be where generated header files are | |||
# placed. This parameter is optional; pass in empty string if you don't want to | |||
# generate include files for these schemas. | |||
# generated_includes_dir: Where to generate the C++ header files for these schemas. The | |||
# generated includes directory will automatically be added to CMake's include | |||
# directories, and will be where generated header files are placed. This parameter is | |||
# optional; pass in empty string if you don't want to generate include files for these | |||
# schemas. | |||
# | |||
# binary_schemas_dir: If you specify an optional binary schema directory, binary | |||
# schemas will be generated for these schemas as well, and placed into the given | |||
# directory. | |||
# binary_schemas_dir: If you specify an optional binary schema directory, binary schemas | |||
# will be generated for these schemas as well, and placed into the given directory. | |||
# | |||
# copy_text_schemas_dir: If you want all text schemas (including schemas from | |||
# all schema include directories) copied into a directory (for example, if you | |||
# need them within your project to build JSON files), you can specify that | |||
# folder here. All text schemas will be copied to that folder. | |||
# copy_text_schemas_dir: If you want all text schemas (including schemas from all schema | |||
# include directories) copied into a directory (for example, if you need them within | |||
# your project to build JSON files), you can specify that folder here. All text schemas | |||
# will be copied to that folder. | |||
# | |||
# IMPORTANT: Make sure you quote all list arguments you pass to this function! | |||
# Otherwise CMake will only pass in the first element. | |||
# Example: build_flatbuffers("${fb_files}" "${include_dirs}" target_name ...) | |||
function(build_flatbuffers flatbuffers_schemas | |||
schema_include_dirs | |||
custom_target_name | |||
additional_dependencies | |||
generated_includes_dir | |||
binary_schemas_dir | |||
copy_text_schemas_dir) | |||
# IMPORTANT: Make sure you quote all list arguments you pass to this function! Otherwise | |||
# CMake will only pass in the first element. Example: build_flatbuffers("${fb_files}" | |||
# "${include_dirs}" target_name ...) | |||
function( | |||
build_flatbuffers | |||
flatbuffers_schemas | |||
schema_include_dirs | |||
custom_target_name | |||
additional_dependencies | |||
generated_includes_dir | |||
binary_schemas_dir | |||
copy_text_schemas_dir) | |||
# Test if including from FindFlatBuffers | |||
if(FLATBUFFERS_FLATC_EXECUTABLE) | |||
@@ -65,10 +62,7 @@ function(build_flatbuffers flatbuffers_schemas | |||
endif() | |||
set(FLATC_SCHEMA_ARGS --gen-mutable) | |||
if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS) | |||
set(FLATC_SCHEMA_ARGS | |||
${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} | |||
${FLATC_SCHEMA_ARGS} | |||
) | |||
set(FLATC_SCHEMA_ARGS ${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} ${FLATC_SCHEMA_ARGS}) | |||
endif() | |||
set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}") | |||
@@ -77,12 +71,12 @@ function(build_flatbuffers flatbuffers_schemas | |||
# Generate the include files parameters. | |||
set(include_params "") | |||
set(all_generated_files "") | |||
foreach (include_dir ${schema_include_dirs}) | |||
foreach(include_dir ${schema_include_dirs}) | |||
set(include_params -I ${include_dir} ${include_params}) | |||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||
# Copy text schemas from dependent folders. | |||
file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob}) | |||
foreach (dependent_schema ${dependent_schemas}) | |||
foreach(dependent_schema ${dependent_schemas}) | |||
file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir}) | |||
endforeach() | |||
endif() | |||
@@ -91,62 +85,54 @@ function(build_flatbuffers flatbuffers_schemas | |||
foreach(schema ${flatbuffers_schemas}) | |||
get_filename_component(filename ${schema} NAME_WE) | |||
# For each schema, do the things we requested. | |||
if (NOT ${generated_includes_dir} STREQUAL "") | |||
if(NOT ${generated_includes_dir} STREQUAL "") | |||
set(generated_include ${generated_includes_dir}/${filename}_generated.h) | |||
add_custom_command( | |||
OUTPUT ${generated_include} | |||
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} | |||
-o ${generated_includes_dir} | |||
${include_params} | |||
-c ${schema} | |||
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} -o ${generated_includes_dir} | |||
${include_params} -c ${schema} | |||
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | |||
WORKING_DIRECTORY "${working_dir}") | |||
list(APPEND all_generated_files ${generated_include}) | |||
endif() | |||
if (NOT ${binary_schemas_dir} STREQUAL "") | |||
if(NOT ${binary_schemas_dir} STREQUAL "") | |||
set(binary_schema ${binary_schemas_dir}/${filename}.bfbs) | |||
add_custom_command( | |||
OUTPUT ${binary_schema} | |||
COMMAND ${FLATC} -b --schema | |||
-o ${binary_schemas_dir} | |||
${include_params} | |||
${schema} | |||
COMMAND ${FLATC} -b --schema -o ${binary_schemas_dir} ${include_params} | |||
${schema} | |||
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | |||
WORKING_DIRECTORY "${working_dir}") | |||
list(APPEND all_generated_files ${binary_schema}) | |||
endif() | |||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||
file(COPY ${schema} DESTINATION ${copy_text_schemas_dir}) | |||
endif() | |||
endforeach() | |||
# Create a custom target that depends on all the generated files. | |||
# This is the target that you can depend on to trigger all these | |||
# to be built. | |||
add_custom_target(${custom_target_name} | |||
DEPENDS ${all_generated_files} ${additional_dependencies}) | |||
# Create a custom target that depends on all the generated files. This is the target | |||
# that you can depend on to trigger all these to be built. | |||
add_custom_target(${custom_target_name} DEPENDS ${all_generated_files} | |||
${additional_dependencies}) | |||
# Register the include directory we are using. | |||
if (NOT ${generated_includes_dir} STREQUAL "") | |||
if(NOT ${generated_includes_dir} STREQUAL "") | |||
include_directories(${generated_includes_dir}) | |||
set_property(TARGET ${custom_target_name} | |||
PROPERTY GENERATED_INCLUDES_DIR | |||
${generated_includes_dir}) | |||
set_property(TARGET ${custom_target_name} PROPERTY GENERATED_INCLUDES_DIR | |||
${generated_includes_dir}) | |||
endif() | |||
# Register the binary schemas dir we are using. | |||
if (NOT ${binary_schemas_dir} STREQUAL "") | |||
set_property(TARGET ${custom_target_name} | |||
PROPERTY BINARY_SCHEMAS_DIR | |||
${binary_schemas_dir}) | |||
if(NOT ${binary_schemas_dir} STREQUAL "") | |||
set_property(TARGET ${custom_target_name} PROPERTY BINARY_SCHEMAS_DIR | |||
${binary_schemas_dir}) | |||
endif() | |||
# Register the text schema copy dir we are using. | |||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||
set_property(TARGET ${custom_target_name} | |||
PROPERTY COPY_TEXT_SCHEMAS_DIR | |||
${copy_text_schemas_dir}) | |||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||
set_property(TARGET ${custom_target_name} PROPERTY COPY_TEXT_SCHEMAS_DIR | |||
${copy_text_schemas_dir}) | |||
endif() | |||
endfunction() |
@@ -1,49 +1,45 @@ | |||
# Parses the version set in src/core/include/megbrain/version.h | |||
# Exports the following variables: | |||
# MGB_VER_MAJOR: Major version | |||
# MGB_VER_MINOR: Minor version | |||
# MGB_VER_PATCH: Patch version | |||
# MGB_IS_DEV: Is development version | |||
# MGB_VER_STRING: Version string | |||
# Parses the version set in src/core/include/megbrain/version.h Exports the following | |||
# variables: MGB_VER_MAJOR: Major version MGB_VER_MINOR: Minor version MGB_VER_PATCH: | |||
# Patch version MGB_IS_DEV: Is development version MGB_VER_STRING: Version string | |||
option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF) | |||
file (READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||
string (REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||
set (MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||
set(MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||
set (MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||
set(MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||
set (MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||
set(MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||
set (MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||
set(MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||
set (MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||
set(MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||
set (MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||
set(MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||
string (REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||
set (MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||
set(MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||
if (MGB_FORCE_DEV_VERSION) | |||
set (MGB_IS_DEV 1) | |||
if(MGB_FORCE_DEV_VERSION) | |||
set(MGB_IS_DEV 1) | |||
else() | |||
string (REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||
set (MGB_IS_DEV ${CMAKE_MATCH_1}) | |||
string(REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||
set(MGB_IS_DEV ${CMAKE_MATCH_1}) | |||
endif() | |||
if (DEFINED MGB_VER_MAJOR) | |||
set (MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||
if(DEFINED MGB_VER_MAJOR) | |||
set(MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||
else() | |||
set (MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||
set(MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||
endif(DEFINED MGB_VER_MAJOR) | |||
if (MGB_IS_DEV) | |||
set (MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||
if(MGB_IS_DEV) | |||
set(MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||
endif() | |||
message(STATUS "Building MegBrain ${MGB_VER_STRING}") |
@@ -2,31 +2,40 @@ | |||
include(ExternalProject) | |||
find_package(LLVM 6.0 REQUIRED CONFIG) | |||
STRING(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||
string(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||
list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR) | |||
list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR) | |||
set(HALIDE_DIR "${PROJECT_SOURCE_DIR}/third_party/Halide" CACHE STRING "halide directory") | |||
set(HALIDE_DIR | |||
"${PROJECT_SOURCE_DIR}/third_party/Halide" | |||
CACHE STRING "halide directory") | |||
set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide) | |||
set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a) | |||
ExternalProject_add( | |||
halide | |||
SOURCE_DIR ${HALIDE_DIR} | |||
PREFIX ${HALIDE_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} -DWITH_APPS=OFF -DWITH_TESTS=OFF -DWITH_TUTORIALS=OFF -DHALIDE_SHARED_LIBRARY=OFF -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DTARGET_MIPS=OFF -DTARGET_POWERPC=OFF | |||
BUILD_BYPRODUCTS ${HALIDE_LIB} | |||
) | |||
ExternalProject_Add( | |||
halide | |||
SOURCE_DIR ${HALIDE_DIR} | |||
PREFIX ${HALIDE_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||
-DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} | |||
-DWITH_APPS=OFF | |||
-DWITH_TESTS=OFF | |||
-DWITH_TUTORIALS=OFF | |||
-DHALIDE_SHARED_LIBRARY=OFF | |||
-DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} | |||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||
-DTARGET_MIPS=OFF | |||
-DTARGET_POWERPC=OFF | |||
BUILD_BYPRODUCTS ${HALIDE_LIB}) | |||
set(HALIDE_INC ${HALIDE_BUILD_DIR}/include) | |||
file(MAKE_DIRECTORY ${HALIDE_INC}) | |||
add_library(libhalide STATIC IMPORTED GLOBAL) | |||
add_dependencies(libhalide halide) | |||
set_target_properties( | |||
libhalide PROPERTIES | |||
IMPORTED_LOCATION ${HALIDE_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC} | |||
) | |||
set_target_properties(libhalide PROPERTIES IMPORTED_LOCATION ${HALIDE_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC}) | |||
set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU) | |||
llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS}) | |||
@@ -1,25 +1,31 @@ | |||
if (MGE_USE_SYSTEM_LIB) | |||
find_package(dnnl) | |||
if (dnnl_FOUND) | |||
message(STATUS "Using system provided MKL-DNN.") | |||
set (MGE_USE_SYSTEM_MKLDNN ON) | |||
return() | |||
endif() | |||
if(MGE_USE_SYSTEM_LIB) | |||
find_package(dnnl) | |||
if(dnnl_FOUND) | |||
message(STATUS "Using system provided MKL-DNN.") | |||
set(MGE_USE_SYSTEM_MKLDNN ON) | |||
return() | |||
endif() | |||
endif() | |||
option(DNNL_BUILD_TESTS "" OFF) | |||
option(DNNL_BUILD_EXAMPLES "" OFF) | |||
# we do not want to use OMP now, so config to CPU mode | |||
# if set to OMP, some dnnl algo will be more fast | |||
set(DNNL_CPU_RUNTIME "SEQ" CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||
# we do not want to use OMP now, so config to CPU mode if set to OMP, some dnnl algo | |||
# will be more fast | |||
set(DNNL_CPU_RUNTIME | |||
"SEQ" | |||
CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||
if(MGE_BLAS STREQUAL "MKL") | |||
option(_DNNL_USE_MKL "" ON) | |||
set(MKLROOT ${MKL_ROOT_DIR} CACHE STRING "MKL ROOT FOR DNNL") | |||
set(MKLLIB libmkl) | |||
option(_DNNL_USE_MKL "" ON) | |||
set(MKLROOT | |||
${MKL_ROOT_DIR} | |||
CACHE STRING "MKL ROOT FOR DNNL") | |||
set(MKLLIB libmkl) | |||
else() | |||
option(_DNNL_USE_MKL "" OFF) | |||
option(_DNNL_USE_MKL "" OFF) | |||
endif() | |||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra") | |||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra") | |||
set(DNNL_LIBRARY_TYPE STATIC CACHE STRING "config dnnl to STATIC") | |||
set(DNNL_LIBRARY_TYPE | |||
STATIC | |||
CACHE STRING "config dnnl to STATIC") | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn) |
@@ -1,30 +1,28 @@ | |||
# - Find the NumPy libraries | |||
# This module finds if NumPy is installed, and sets the following variables | |||
# indicating where it is. | |||
# * Find the NumPy libraries This module finds if NumPy is installed, and sets the | |||
# following variables indicating where it is. | |||
# | |||
# TODO: Update to provide the libraries and paths for linking npymath lib. | |||
# | |||
# NUMPY_FOUND - was NumPy found | |||
# NUMPY_VERSION - the version of NumPy found as a string | |||
# NUMPY_VERSION_MAJOR - the major version number of NumPy | |||
# NUMPY_VERSION_MINOR - the minor version number of NumPy | |||
# NUMPY_VERSION_PATCH - the patch version number of NumPy | |||
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 | |||
# NUMPY_INCLUDE_DIR - path to the NumPy include files | |||
# NUMPY_FOUND - was NumPy found NUMPY_VERSION - the version of | |||
# NumPy found as a string NUMPY_VERSION_MAJOR - the major version number of NumPy | |||
# NUMPY_VERSION_MINOR - the minor version number of NumPy NUMPY_VERSION_PATCH - | |||
# the patch version number of NumPy NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is | |||
# 10601 NUMPY_INCLUDE_DIR - path to the NumPy include files | |||
unset(NUMPY_VERSION) | |||
unset(NUMPY_INCLUDE_DIR) | |||
if(PYTHONINTERP_FOUND) | |||
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||
"import numpy as n; print(n.__version__); print(n.get_include());" | |||
execute_process( | |||
COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||
"import numpy as n; print(n.__version__); print(n.get_include());" | |||
RESULT_VARIABLE __result | |||
OUTPUT_VARIABLE __output | |||
OUTPUT_STRIP_TRAILING_WHITESPACE) | |||
if(__result MATCHES 0) | |||
string(REGEX REPLACE ";" "\\\\;" __values ${__output}) | |||
string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||
string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||
list(GET __values 0 NUMPY_VERSION) | |||
list(GET __values 1 NUMPY_INCLUDE_DIR) | |||
@@ -33,13 +31,18 @@ if(PYTHONINTERP_FOUND) | |||
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) | |||
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) | |||
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) | |||
math(EXPR NUMPY_VERSION_DECIMAL | |||
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") | |||
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||
math( | |||
EXPR | |||
NUMPY_VERSION_DECIMAL | |||
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}" | |||
) | |||
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||
else() | |||
unset(NUMPY_VERSION) | |||
unset(NUMPY_INCLUDE_DIR) | |||
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n") | |||
unset(NUMPY_VERSION) | |||
unset(NUMPY_INCLUDE_DIR) | |||
message( | |||
STATUS | |||
"Requested NumPy version and include path, but got instead:\n${__output}\n") | |||
endif() | |||
endif() | |||
else() | |||
@@ -47,8 +50,10 @@ else() | |||
endif() | |||
include(FindPackageHandleStandardArgs) | |||
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||
VERSION_VAR NUMPY_VERSION) | |||
find_package_handle_standard_args( | |||
NumPy | |||
REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||
VERSION_VAR NUMPY_VERSION) | |||
if(NUMPY_FOUND) | |||
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") | |||
@@ -1,48 +1,50 @@ | |||
if (MGE_USE_SYSTEM_LIB) | |||
find_package(OpenBLAS) | |||
set (MGE_USE_SYSTEM_OPENBLAS ON) | |||
message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||
add_library(libopenblas IMPORTED GLOBAL) | |||
set_target_properties( | |||
libopenblas PROPERTIES | |||
IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS} | |||
) | |||
return() | |||
if(MGE_USE_SYSTEM_LIB) | |||
find_package(OpenBLAS) | |||
set(MGE_USE_SYSTEM_OPENBLAS ON) | |||
message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||
add_library(libopenblas IMPORTED GLOBAL) | |||
set_target_properties( | |||
libopenblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS}) | |||
return() | |||
endif() | |||
include(ExternalProject) | |||
include(GNUInstallDirs) | |||
set(OPENBLAS_DIR "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" CACHE STRING "OpenBLAS directory") | |||
set(OPENBLAS_DIR | |||
"${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" | |||
CACHE STRING "OpenBLAS directory") | |||
set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS) | |||
set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include) | |||
set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a) | |||
if(${CMAKE_GENERATOR} STREQUAL "Ninja") | |||
set(MAKE_COMMAND make) | |||
set(MAKE_COMMAND make) | |||
else() | |||
set(MAKE_COMMAND "$(MAKE)") | |||
set(MAKE_COMMAND "$(MAKE)") | |||
endif() | |||
ExternalProject_add( | |||
openblas | |||
SOURCE_DIR ${OPENBLAS_DIR} | |||
PREFIX ${OPENBLAS_BUILD_DIR} | |||
CMAKE_GENERATOR "Unix Makefiles" | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||
BUILD_COMMAND ${MAKE_COMMAND} | |||
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE} | |||
) | |||
ExternalProject_Add( | |||
openblas | |||
SOURCE_DIR ${OPENBLAS_DIR} | |||
PREFIX ${OPENBLAS_BUILD_DIR} | |||
CMAKE_GENERATOR "Unix Makefiles" | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||
-DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} | |||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||
BUILD_COMMAND ${MAKE_COMMAND} | |||
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE}) | |||
file(MAKE_DIRECTORY ${OPENBLAS_INC}) | |||
add_library(libopenblas STATIC IMPORTED GLOBAL) | |||
add_dependencies(libopenblas openblas) | |||
set_target_properties( | |||
libopenblas PROPERTIES | |||
IMPORTED_LOCATION ${OPENBLAS_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include | |||
) | |||
libopenblas PROPERTIES IMPORTED_LOCATION ${OPENBLAS_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include) |
@@ -1,31 +1,31 @@ | |||
find_library(ACLRT_LIBRARY | |||
NAMES libascendcl.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES stub | |||
DOC "ACL library." ) | |||
find_library( | |||
ACLRT_LIBRARY | |||
NAMES libascendcl.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES stub | |||
DOC "ACL library.") | |||
if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find ACLRT Library") | |||
message(FATAL_ERROR "Can not find ACLRT Library") | |||
endif() | |||
get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH) | |||
find_path(ACLRT_INCLUDE_DIR | |||
NAMES acl/acl.h | |||
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to ACLRT include directory." ) | |||
find_path( | |||
ACLRT_INCLUDE_DIR | |||
NAMES acl/acl.h | |||
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to ACLRT include directory.") | |||
if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find ACLRT Library") | |||
message(FATAL_ERROR "Can not find ACLRT Library") | |||
endif() | |||
add_library(libascendcl SHARED IMPORTED) | |||
set_target_properties(libascendcl PROPERTIES | |||
IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR} | |||
) | |||
set_target_properties( | |||
libascendcl PROPERTIES IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR}) | |||
message(STATUS "Found ACLRT: ${__found_aclrt_root}") | |||
@@ -1,44 +1,57 @@ | |||
find_library(CNDEV_LIBRARY | |||
NAMES libcndev.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNDEV library." ) | |||
find_library( | |||
CNDEV_LIBRARY | |||
NAMES libcndev.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNDEV library.") | |||
if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNDEV Library") | |||
message(FATAL_ERROR "Can not find CNDEV Library") | |||
endif() | |||
get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH) | |||
find_path(CNDEV_INCLUDE_DIR | |||
NAMES cndev.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNDEV include directory." ) | |||
find_path( | |||
CNDEV_INCLUDE_DIR | |||
NAMES cndev.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNDEV include directory.") | |||
if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNDEV Library") | |||
message(FATAL_ERROR "Can not find CNDEV Library") | |||
endif() | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 "${CNDEV_1}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 "${CNDEV_2}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 "${CNDEV_3}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 "${CNDEV_4}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 "${CNDEV_5}") | |||
set(CNDEV_VERSION_STRING "${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 | |||
REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 | |||
REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 | |||
REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 | |||
REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 | |||
REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 | |||
"${CNDEV_1}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 | |||
"${CNDEV_2}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 | |||
"${CNDEV_3}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 | |||
"${CNDEV_4}") | |||
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 | |||
"${CNDEV_5}") | |||
set(CNDEV_VERSION_STRING | |||
"${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}" | |||
) | |||
add_library(libcndev SHARED IMPORTED) | |||
set_target_properties(libcndev PROPERTIES | |||
IMPORTED_LOCATION ${CNDEV_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNDEV_INCLUDE_DIR} | |||
) | |||
message(STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") | |||
set_target_properties( | |||
libcndev PROPERTIES IMPORTED_LOCATION ${CNDEV_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${CNDEV_INCLUDE_DIR}) | |||
message( | |||
STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") |
@@ -1,40 +1,49 @@ | |||
find_library(CNLIGHT_LIBRARY | |||
NAMES libcnlight.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNLIGHT library." ) | |||
find_library( | |||
CNLIGHT_LIBRARY | |||
NAMES libcnlight.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNLIGHT library.") | |||
if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||
endif() | |||
get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH) | |||
find_path(CNLIGHT_INCLUDE_DIR | |||
NAMES cnlight.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNLIGHT include directory." ) | |||
find_path( | |||
CNLIGHT_INCLUDE_DIR | |||
NAMES cnlight.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNLIGHT include directory.") | |||
if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||
endif() | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||
set(CNLIGHT_VERSION_STRING "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR | |||
REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR | |||
REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH | |||
REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||
CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" | |||
CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" | |||
CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||
set(CNLIGHT_VERSION_STRING | |||
"${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||
add_library(libcnlight SHARED IMPORTED) | |||
set_target_properties(libcnlight PROPERTIES | |||
IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR} | |||
) | |||
message(STATUS "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") | |||
set_target_properties( | |||
libcnlight PROPERTIES IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR}) | |||
message( | |||
STATUS | |||
"Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") |
@@ -1,40 +1,48 @@ | |||
find_library(CNML_LIBRARY | |||
NAMES libcnml.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNML library." ) | |||
find_library( | |||
CNML_LIBRARY | |||
NAMES libcnml.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNML library.") | |||
if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNML Library") | |||
message(FATAL_ERROR "Can not find CNML Library") | |||
endif() | |||
get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH) | |||
find_path(CNML_INCLUDE_DIR | |||
NAMES cnml.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNML include directory." ) | |||
find_path( | |||
CNML_INCLUDE_DIR | |||
NAMES cnml.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNML include directory.") | |||
if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNML Library") | |||
message(FATAL_ERROR "Can not find CNML Library") | |||
endif() | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR "${CNML_MAJOR}") | |||
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR "${CNML_MINOR}") | |||
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH "${CNML_PATCH}") | |||
set(CNML_VERSION_STRING "${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR | |||
REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR | |||
REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH | |||
REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR | |||
"${CNML_MAJOR}") | |||
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR | |||
"${CNML_MINOR}") | |||
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH | |||
"${CNML_PATCH}") | |||
set(CNML_VERSION_STRING | |||
"${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||
add_library(libcnml SHARED IMPORTED) | |||
set_target_properties(libcnml PROPERTIES | |||
IMPORTED_LOCATION ${CNML_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNML_INCLUDE_DIR} | |||
) | |||
message(STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") | |||
set_target_properties( | |||
libcnml PROPERTIES IMPORTED_LOCATION ${CNML_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${CNML_INCLUDE_DIR}) | |||
message( | |||
STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") |
@@ -1,80 +1,100 @@ | |||
find_library(CNNL_LIBRARY | |||
NAMES libcnnl.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNNL library." ) | |||
find_library( | |||
CNNL_LIBRARY | |||
NAMES libcnnl.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNNL library.") | |||
if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNNL Library") | |||
message(FATAL_ERROR "Can not find CNNL Library") | |||
endif() | |||
get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH) | |||
find_path(CNNL_INCLUDE_DIR | |||
NAMES cnnl.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNNL include directory." ) | |||
find_path( | |||
CNNL_INCLUDE_DIR | |||
NAMES cnnl.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNNL include directory.") | |||
if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNNL Library") | |||
message(FATAL_ERROR "Can not find CNNL Library") | |||
endif() | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR "${CNNL_MAJOR}") | |||
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR "${CNNL_MINOR}") | |||
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH "${CNNL_PATCH}") | |||
set(CNNL_VERSION_STRING "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR | |||
REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR | |||
REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH | |||
REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR | |||
"${CNNL_MAJOR}") | |||
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR | |||
"${CNNL_MINOR}") | |||
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH | |||
"${CNNL_PATCH}") | |||
set(CNNL_VERSION_STRING | |||
"${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||
add_library(libcnnl SHARED IMPORTED) | |||
set_target_properties(libcnnl PROPERTIES | |||
IMPORTED_LOCATION ${CNNL_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_INCLUDE_DIR} | |||
) | |||
set_target_properties( | |||
libcnnl PROPERTIES IMPORTED_LOCATION ${CNNL_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${CNNL_INCLUDE_DIR}) | |||
message(STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||
message( | |||
STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||
find_library(CNNL_EXTRA_LIBRARY | |||
NAMES libcnnl_extra.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNNL_EXTRA library." ) | |||
find_library( | |||
CNNL_EXTRA_LIBRARY | |||
NAMES libcnnl_extra.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNNL_EXTRA library.") | |||
if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||
endif() | |||
get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH) | |||
find_path(CNNL_EXTRA_INCLUDE_DIR | |||
NAMES cnnl_extra.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNNL_EXTRA include directory." ) | |||
find_path( | |||
CNNL_EXTRA_INCLUDE_DIR | |||
NAMES cnnl_extra.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNNL_EXTRA include directory.") | |||
if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||
endif() | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||
set(CNNL_EXTRA_VERSION_STRING "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}") | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR | |||
REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR | |||
REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH | |||
REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" | |||
CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" | |||
CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" | |||
CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||
set(CNNL_EXTRA_VERSION_STRING | |||
"${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}" | |||
) | |||
add_library(libcnnl_extra SHARED IMPORTED) | |||
set_target_properties(libcnnl_extra PROPERTIES | |||
IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR} | |||
) | |||
message(STATUS "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})") | |||
set_target_properties( | |||
libcnnl_extra PROPERTIES IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR}) | |||
message( | |||
STATUS | |||
"Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})" | |||
) |
@@ -1,40 +1,48 @@ | |||
find_library(CNRT_LIBRARY | |||
NAMES libcnrt.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNRT library." ) | |||
find_library( | |||
CNRT_LIBRARY | |||
NAMES libcnrt.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CNRT library.") | |||
if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNRT Library") | |||
message(FATAL_ERROR "Can not find CNRT Library") | |||
endif() | |||
get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH) | |||
find_path(CNRT_INCLUDE_DIR | |||
NAMES cnrt.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNRT include directory." ) | |||
find_path( | |||
CNRT_INCLUDE_DIR | |||
NAMES cnrt.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CNRT include directory.") | |||
if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CNRT Library") | |||
message(FATAL_ERROR "Can not find CNRT Library") | |||
endif() | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR "${CNRT_MAJOR}") | |||
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR "${CNRT_MINOR}") | |||
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH "${CNRT_PATCH}") | |||
set(CNRT_VERSION_STRING "${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR | |||
REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR | |||
REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH | |||
REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR | |||
"${CNRT_MAJOR}") | |||
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR | |||
"${CNRT_MINOR}") | |||
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH | |||
"${CNRT_PATCH}") | |||
set(CNRT_VERSION_STRING | |||
"${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||
add_library(libcnrt SHARED IMPORTED) | |||
set_target_properties(libcnrt PROPERTIES | |||
IMPORTED_LOCATION ${CNRT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CNRT_INCLUDE_DIR} | |||
) | |||
message(STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") | |||
set_target_properties( | |||
libcnrt PROPERTIES IMPORTED_LOCATION ${CNRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${CNRT_INCLUDE_DIR}) | |||
message( | |||
STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") |
@@ -1,2 +1,5 @@ | |||
file(GLOB_RECURSE CPP_REDIS_SRCS ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp ${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes ${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) | |||
file(GLOB_RECURSE CPP_REDIS_SRCS | |||
${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp | |||
${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes | |||
${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) |
@@ -1,20 +1,20 @@ | |||
if (MGE_USE_SYSTEM_LIB) | |||
find_package(Cpuinfo) | |||
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||
add_library(libcpuinfo IMPORTED GLOBAL) | |||
set_target_properties( | |||
libcpuinfo PROPERTIES | |||
IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS} | |||
) | |||
return() | |||
if(MGE_USE_SYSTEM_LIB) | |||
find_package(Cpuinfo) | |||
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||
add_library(libcpuinfo IMPORTED GLOBAL) | |||
set_target_properties( | |||
libcpuinfo PROPERTIES IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS}) | |||
return() | |||
endif() | |||
SET(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||
OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||
OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||
OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||
OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||
set(CPUINFO_LIBRARY_TYPE | |||
"static" | |||
CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||
option(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||
option(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||
option(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||
option(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||
include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include") | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo | |||
${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) |
@@ -1,73 +1,83 @@ | |||
find_package(PkgConfig) | |||
if(${PkgConfig_FOUND}) | |||
pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||
pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||
endif() | |||
if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||
if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||
endif() | |||
if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED) | |||
find_library(CUDNN_LIBRARY | |||
NAMES libcudnn_static.a cudnn.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CUDNN library." ) | |||
find_library( | |||
CUDNN_LIBRARY | |||
NAMES libcudnn_static.a cudnn.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||
${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CUDNN library.") | |||
else() | |||
find_library(CUDNN_LIBRARY | |||
NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CUDNN library." ) | |||
find_library( | |||
CUDNN_LIBRARY | |||
NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||
${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "CUDNN library.") | |||
endif() | |||
if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||
) | |||
endif() | |||
get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH) | |||
find_path(CUDNN_INCLUDE_DIR | |||
NAMES cudnn.h | |||
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CUDNN include directory." ) | |||
find_path( | |||
CUDNN_INCLUDE_DIR | |||
NAMES cudnn.h | |||
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} | |||
${__found_cudnn_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to CUDNN include directory.") | |||
if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||
) | |||
endif() | |||
if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h) | |||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||
else() | |||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||
endif() | |||
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" | |||
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" | |||
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") | |||
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" | |||
CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" | |||
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") | |||
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" | |||
CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" | |||
CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") | |||
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" CUDNN_MAJOR_VERSION | |||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" CUDNN_MAJOR_VERSION | |||
"${CUDNN_MAJOR_VERSION}") | |||
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" CUDNN_MINOR_VERSION | |||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" CUDNN_MINOR_VERSION | |||
"${CUDNN_MINOR_VERSION}") | |||
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" CUDNN_PATCH_VERSION | |||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" CUDNN_PATCH_VERSION | |||
"${CUDNN_PATCH_VERSION}") | |||
set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}) | |||
if(MGE_CUDA_USE_STATIC) | |||
add_library(libcudnn STATIC IMPORTED) | |||
add_library(libcudnn STATIC IMPORTED) | |||
else() | |||
add_library(libcudnn SHARED IMPORTED) | |||
add_library(libcudnn SHARED IMPORTED) | |||
endif() | |||
set_target_properties(libcudnn PROPERTIES | |||
IMPORTED_LOCATION ${CUDNN_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR}) | |||
set_target_properties( | |||
libcudnn PROPERTIES IMPORTED_LOCATION ${CUDNN_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${CUDNN_INCLUDE_DIR}) | |||
message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})") |
@@ -1,27 +1,47 @@ | |||
if (MGE_USE_SYSTEM_LIB) | |||
find_package(Flatbuffers REQUIRED) | |||
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||
include(cmake/BuildFlatBuffers.cmake) | |||
return() | |||
if(MGE_USE_SYSTEM_LIB) | |||
find_package(Flatbuffers REQUIRED) | |||
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||
include(cmake/BuildFlatBuffers.cmake) | |||
return() | |||
endif() | |||
if(MSVC OR WIN32) | |||
message(DEBUG "add flags flatc for clang-cl build") | |||
set(FLATC_FLAGS "") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal") | |||
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override") | |||
endif() | |||
message(DEBUG "add flags flatc for clang-cl build") | |||
set(FLATC_FLAGS "") | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors" | |||
) | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal" | |||
) | |||
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||
set(FLATC_FLAGS | |||
"${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override" | |||
) | |||
endif() | |||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||
endif() | |||
option(FLATBUFFERS_BUILD_TESTS "" OFF) | |||
@@ -1 +1,2 @@ | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags) | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags | |||
${CMAKE_CURRENT_BINARY_DIR}/gflags) |
@@ -1,2 +1,2 @@ | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) | |||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest | |||
${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) |
@@ -1,88 +1,136 @@ | |||
# - Find the llvm/mlir libraries | |||
# This module finds if llvm/mlir is installed, or build llvm/mlir from source. | |||
# This module sets the following variables. | |||
# * Find the llvm/mlir libraries This module finds if llvm/mlir is installed, or build | |||
# llvm/mlir from source. This module sets the following variables. | |||
# | |||
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files | |||
# MLIR_LLVM_LIBS - path to the LLVM/MLIR libraries | |||
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files MLIR_LLVM_LIBS - path | |||
# to the LLVM/MLIR libraries | |||
# | |||
# This module define the following functions. | |||
# | |||
# external_tablegen_library - created interface library which depends on tablegen outputs | |||
# external_tablegen_library - created interface library which depends on tablegen | |||
# outputs | |||
include(CMakeParseArguments) | |||
function(external_tablegen_library) | |||
cmake_parse_arguments( | |||
_RULE | |||
"TESTONLY" | |||
"NAME;TBLGEN" | |||
"SRCS;INCLUDES;OUTS" | |||
${ARGN} | |||
) | |||
cmake_parse_arguments(_RULE "TESTONLY" "NAME;TBLGEN" "SRCS;INCLUDES;OUTS" ${ARGN}) | |||
if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||
return() | |||
endif() | |||
if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||
return() | |||
endif() | |||
set(_NAME ${_RULE_NAME}) | |||
set(_NAME ${_RULE_NAME}) | |||
set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||
set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||
set(_OUTPUTS) | |||
while(_RULE_OUTS) | |||
list(GET _RULE_OUTS 0 _COMMAND) | |||
list(REMOVE_AT _RULE_OUTS 0) | |||
list(GET _RULE_OUTS 0 _FILE) | |||
list(REMOVE_AT _RULE_OUTS 0) | |||
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||
endwhile() | |||
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||
set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||
set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||
set(_OUTPUTS) | |||
while(_RULE_OUTS) | |||
list(GET _RULE_OUTS 0 _COMMAND) | |||
list(REMOVE_AT _RULE_OUTS 0) | |||
list(GET _RULE_OUTS 0 _FILE) | |||
list(REMOVE_AT _RULE_OUTS 0) | |||
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||
endwhile() | |||
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||
add_library(${_NAME} INTERFACE) | |||
add_dependencies(${_NAME} ${_NAME}_target) | |||
add_library(${_NAME} INTERFACE) | |||
add_dependencies(${_NAME} ${_NAME}_target) | |||
target_include_directories(${_NAME} INTERFACE | |||
"$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||
target_include_directories(${_NAME} INTERFACE "$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||
install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||
install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||
endfunction() | |||
set(LLVM_LIBS LLVMCore LLVMSupport LLVMX86CodeGen LLVMOrcJIT LLVMNVPTXCodeGen LLVMNVPTXDesc LLVMNVPTXInfo) | |||
set(MLIR_CORE_LIBS MLIRAnalysis MLIRExecutionEngine MLIRIR MLIRParser MLIRPass MLIRSideEffectInterfaces MLIRTransforms) | |||
set(MLIR_DIALECT_LIBS MLIRAsync MLIRAVX512 MLIRGPU MLIRLLVMAVX512 MLIRNVVMIR MLIROpenACC MLIRPDL MLIRPDLInterp MLIRQuant MLIRROCDLIR MLIRSDBM MLIRShape MLIRSPIRV MLIRStandardOpsTransforms MLIRTosa) | |||
set(MLIR_CONVERSION_LIBS MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||
set(LLVM_LIBS | |||
LLVMCore | |||
LLVMSupport | |||
LLVMX86CodeGen | |||
LLVMOrcJIT | |||
LLVMNVPTXCodeGen | |||
LLVMNVPTXDesc | |||
LLVMNVPTXInfo) | |||
set(MLIR_CORE_LIBS | |||
MLIRAnalysis | |||
MLIRExecutionEngine | |||
MLIRIR | |||
MLIRParser | |||
MLIRPass | |||
MLIRSideEffectInterfaces | |||
MLIRTransforms) | |||
set(MLIR_DIALECT_LIBS | |||
MLIRAsync | |||
MLIRAVX512 | |||
MLIRGPU | |||
MLIRLLVMAVX512 | |||
MLIRNVVMIR | |||
MLIROpenACC | |||
MLIRPDL | |||
MLIRPDLInterp | |||
MLIRQuant | |||
MLIRROCDLIR | |||
MLIRSDBM | |||
MLIRShape | |||
MLIRSPIRV | |||
MLIRStandardOpsTransforms | |||
MLIRTosa) | |||
set(MLIR_CONVERSION_LIBS | |||
MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms | |||
MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||
set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR) | |||
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} ${MLIR_TRANSLATION_LIBS}) | |||
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} | |||
${MLIR_TRANSLATION_LIBS}) | |||
set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS}) | |||
function(add_mge_mlir_src_dep llvm_monorepo_path) | |||
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||
set(CMAKE_BUILD_TYPE "Debug") | |||
endif() | |||
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) | |||
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES | |||
"^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||
set(CMAKE_BUILD_TYPE "Debug") | |||
endif() | |||
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||
set(BUILD_SHARED_LIBS | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||
add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||
# Reset CMAKE_BUILD_TYPE to its previous setting | |||
set(CMAKE_BUILD_TYPE "${_CMAKE_BUILD_TYPE}" CACHE STRING "Build type" FORCE) | |||
# Reset BUILD_SHARED_LIBS to its previous setting | |||
set(BUILD_SHARED_LIBS ${_CMAKE_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libraries" FORCE) | |||
# Reset CMAKE_BUILD_TYPE to its previous setting | |||
set(CMAKE_BUILD_TYPE | |||
"${_CMAKE_BUILD_TYPE}" | |||
CACHE STRING "Build type" FORCE) | |||
# Reset BUILD_SHARED_LIBS to its previous setting | |||
set(BUILD_SHARED_LIBS | |||
${_CMAKE_BUILD_SHARED_LIBS} | |||
CACHE BOOL "Build shared libraries" FORCE) | |||
endfunction() | |||
# llvm build options | |||
set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "" FORCE) | |||
set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "" FORCE) | |||
set(LLVM_ENABLE_RTTI ${MGE_ENABLE_RTTI} CACHE BOOL "" FORCE) | |||
set(LLVM_TARGETS_TO_BUILD "X86;NVPTX;AArch64;ARM" CACHE STRING "" FORCE) | |||
set(LLVM_ENABLE_PROJECTS "mlir" CACHE STRING "" FORCE) | |||
set(LLVM_INCLUDE_EXAMPLES | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_TESTS | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_DOCS | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_ENABLE_BINDINGS | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_INCLUDE_BENCHMARKS | |||
OFF | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_ENABLE_RTTI | |||
${MGE_ENABLE_RTTI} | |||
CACHE BOOL "" FORCE) | |||
set(LLVM_TARGETS_TO_BUILD | |||
"X86;NVPTX;AArch64;ARM" | |||
CACHE STRING "" FORCE) | |||
set(LLVM_ENABLE_PROJECTS | |||
"mlir" | |||
CACHE STRING "" FORCE) | |||
set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm) | |||
add_mge_mlir_src_dep("third_party/llvm-project") | |||
@@ -91,6 +139,5 @@ set(MLIR_LLVM_INCLUDE_DIR | |||
${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include | |||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include | |||
${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include | |||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include | |||
) | |||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include) | |||
set(MLIR_TABLEGEN_EXE mlir-tblgen) |
@@ -1,54 +1,64 @@ | |||
find_library(MAGICMIND_LIBRARY | |||
NAMES libmagicmind.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "MAGICMIND library." ) | |||
find_library( | |||
MAGICMIND_LIBRARY | |||
NAMES libmagicmind.so | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "MAGICMIND library.") | |||
if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||
endif() | |||
get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH) | |||
find_path(MAGICMIND_INCLUDE_DIR | |||
NAMES common.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to MAGICMIND include directory." ) | |||
find_path( | |||
MAGICMIND_INCLUDE_DIR | |||
NAMES common.h | |||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to MAGICMIND include directory.") | |||
if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||
endif() | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR | |||
REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR | |||
REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH | |||
REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||
string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||
set(MAGICMIND_VERSION_STRING "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||
string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||
MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" | |||
MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" | |||
MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||
set(MAGICMIND_VERSION_STRING | |||
"${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||
add_library(libmagicmind SHARED IMPORTED) | |||
set_target_properties(libmagicmind PROPERTIES | |||
IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR} | |||
) | |||
set_target_properties( | |||
libmagicmind PROPERTIES IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR}) | |||
message(STATUS "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})") | |||
message( | |||
STATUS | |||
"Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})" | |||
) | |||
find_library(MAGICMIND_RUNTIME_LIBRARY | |||
NAMES libmagicmind_runtime.so | |||
PATHS "${__found_magicmind_root}/lib64" | |||
) | |||
find_library( | |||
MAGICMIND_RUNTIME_LIBRARY | |||
NAMES libmagicmind_runtime.so | |||
PATHS "${__found_magicmind_root}/lib64") | |||
if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||
else() | |||
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||
endif() | |||
add_library(libmagicmind_runtime SHARED IMPORTED) | |||
set_target_properties(libmagicmind_runtime PROPERTIES | |||
IMPORTED_LOCATION ${MAGICMIND_RUNTIME_LIBRARY} | |||
) | |||
set_target_properties(libmagicmind_runtime PROPERTIES IMPORTED_LOCATION | |||
${MAGICMIND_RUNTIME_LIBRARY}) |
@@ -1,77 +1,83 @@ | |||
find_path(MKL_ROOT_DIR | |||
include/mkl_cblas.h | |||
PATHS | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||
$ENV{MKLDIR} | |||
/opt/intel/mkl/*/ | |||
/opt/intel/cmkl/*/ | |||
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal | |||
) | |||
find_path( | |||
MKL_ROOT_DIR include/mkl_cblas.h | |||
PATHS ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||
$ENV{MKLDIR} | |||
/opt/intel/mkl/*/ | |||
/opt/intel/cmkl/*/ | |||
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal) | |||
if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find MKL") | |||
message(FATAL_ERROR "Can not find MKL") | |||
endif() | |||
message(STATUS "Build with MKL in ${MKL_ROOT_DIR}") | |||
find_path(MKL_INCLUDE_DIR | |||
mkl_cblas.h | |||
PATHS | |||
${MKL_ROOT_DIR}/include | |||
${INCLUDE_INSTALL_DIR} | |||
) | |||
find_path(MKL_INCLUDE_DIR mkl_cblas.h PATHS ${MKL_ROOT_DIR}/include | |||
${INCLUDE_INSTALL_DIR}) | |||
option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON) | |||
if(MGE_MKL_USE_STATIC) | |||
find_library(MKL_CORE_LIBRARY | |||
NAMES libmkl_core.a mkl_core.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library( | |||
MKL_CORE_LIBRARY | |||
NAMES libmkl_core.a mkl_core.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library(MKL_SEQUENTIAL_LIBRARY | |||
NAMES libmkl_sequential.a mkl_sequential.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library( | |||
MKL_SEQUENTIAL_LIBRARY | |||
NAMES libmkl_sequential.a mkl_sequential.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
if(${MGE_ARCH} STREQUAL "x86_64") | |||
find_library(MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
elseif(${MGE_ARCH} STREQUAL "i386") | |||
find_library(MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
endif() | |||
if(${MGE_ARCH} STREQUAL "x86_64") | |||
find_library( | |||
MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
elseif(${MGE_ARCH} STREQUAL "i386") | |||
find_library( | |||
MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
endif() | |||
add_library(libmkl INTERFACE IMPORTED) | |||
if(UNIX AND NOT APPLE) | |||
target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group) | |||
else() | |||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||
endif() | |||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||
add_library(libmkl INTERFACE IMPORTED) | |||
if(UNIX AND NOT APPLE) | |||
target_link_libraries( | |||
libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||
${MKL_IPL_LIBRARY} -Wl,--end-group) | |||
else() | |||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} | |||
${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||
endif() | |||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||
else() | |||
find_library(MKL_CORE_LIBRARY | |||
NAMES libmkl_core.so libmkl_core.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library( | |||
MKL_CORE_LIBRARY | |||
NAMES libmkl_core.so libmkl_core.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library(MKL_SEQUENTIAL_LIBRARY | |||
NAMES libmkl_sequential.so libmkl_sequential.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
find_library( | |||
MKL_SEQUENTIAL_LIBRARY | |||
NAMES libmkl_sequential.so libmkl_sequential.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
if(${MGE_ARCH} STREQUAL "x86_64") | |||
find_library(MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
elseif(${MGE_ARCH} STREQUAL "x86_32") | |||
find_library(MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
endif() | |||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||
if(${MGE_ARCH} STREQUAL "x86_64") | |||
find_library( | |||
MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
elseif(${MGE_ARCH} STREQUAL "x86_32") | |||
find_library( | |||
MKL_IPL_LIBRARY | |||
NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||
endif() | |||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||
${MKL_IPL_LIBRARY}) | |||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||
endif() | |||
if(${MGE_ARCH} STREQUAL "x86_64") | |||
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||
endif() |
@@ -1,70 +1,83 @@ | |||
function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR) | |||
if(NOT ARGN) | |||
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||
return() | |||
endif() | |||
if(NOT ARGN) | |||
message( | |||
SEND_ERROR | |||
"Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||
return() | |||
endif() | |||
set(${SRCS}) | |||
set(${HDRS}) | |||
foreach(FIL ${ARGN}) | |||
set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||
get_filename_component(FIL_WE ${FIL} NAME_WE) | |||
get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||
set(${SRCS}) | |||
set(${HDRS}) | |||
foreach(FIL ${ARGN}) | |||
set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||
get_filename_component(FIL_WE ${FIL} NAME_WE) | |||
get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||
add_custom_command( | |||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} | |||
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||
DEPENDS ${ABS_FIL} libprotobuf | |||
COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||
VERBATIM) | |||
endforeach() | |||
add_custom_command( | |||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} | |||
-I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||
DEPENDS ${ABS_FIL} libprotobuf | |||
COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||
VERBATIM) | |||
endforeach() | |||
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||
set(${SRCS} ${${SRCS}} PARENT_SCOPE) | |||
set(${HDRS} ${${HDRS}} PARENT_SCOPE) | |||
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||
set(${SRCS} | |||
${${SRCS}} | |||
PARENT_SCOPE) | |||
set(${HDRS} | |||
${${HDRS}} | |||
PARENT_SCOPE) | |||
endfunction() | |||
if(MGE_USE_SYSTEM_LIB) | |||
find_package(Protobuf) | |||
if(Protobuf_FOUND) | |||
add_library(libprotobuf INTERFACE) | |||
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||
set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||
return() | |||
endif() | |||
find_package(Protobuf) | |||
if(Protobuf_FOUND) | |||
add_library(libprotobuf INTERFACE) | |||
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||
set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||
return() | |||
endif() | |||
endif() | |||
include(ExternalProject) | |||
include(GNUInstallDirs) | |||
set(PROTOBUF_DIR "${PROJECT_SOURCE_DIR}/third_party/protobuf" CACHE STRING "protobuf directory") | |||
set(PROTOBUF_DIR | |||
"${PROJECT_SOURCE_DIR}/third_party/protobuf" | |||
CACHE STRING "protobuf directory") | |||
set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf) | |||
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | |||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||
else() | |||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||
endif() | |||
set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc) | |||
ExternalProject_add( | |||
protobuf | |||
SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||
PREFIX ${PROTOBUF_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} -Dprotobuf_BUILD_EXAMPLES=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE} | |||
) | |||
ExternalProject_Add( | |||
protobuf | |||
SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||
PREFIX ${PROTOBUF_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} | |||
-Dprotobuf_BUILD_EXAMPLES=OFF | |||
-Dprotobuf_BUILD_TESTS=OFF | |||
-DBUILD_SHARED_LIBS=OFF | |||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE}) | |||
set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include) | |||
file(MAKE_DIRECTORY ${PROTOBUF_INC}) | |||
@@ -72,19 +85,14 @@ file(MAKE_DIRECTORY ${PROTOBUF_INC}) | |||
add_library(libprotobuf STATIC IMPORTED GLOBAL) | |||
add_dependencies(libprotobuf protobuf) | |||
set_target_properties( | |||
libprotobuf PROPERTIES | |||
IMPORTED_LOCATION ${PROTOBUF_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include | |||
) | |||
libprotobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include) | |||
add_executable(protoc IMPORTED GLOBAL) | |||
add_dependencies(protoc protobuf) | |||
set_target_properties( | |||
protoc PROPERTIES | |||
IMPORTED_LOCATION ${PROTOBUF_BUILD_DIR}/bin/protoc | |||
) | |||
set_target_properties(protoc PROPERTIES IMPORTED_LOCATION | |||
${PROTOBUF_BUILD_DIR}/bin/protoc) | |||
set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR}) | |||
set(PROTOBUF_PROTOC_EXECUTABLE protoc) | |||
set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include) | |||
@@ -1,28 +1,34 @@ | |||
if(NOT DEFINED HIP_PATH) | |||
if(NOT DEFINED ENV{HIP_PATH}) | |||
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") | |||
else() | |||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") | |||
endif() | |||
if(NOT DEFINED ENV{HIP_PATH}) | |||
set(HIP_PATH | |||
"/opt/rocm/hip" | |||
CACHE PATH "Path to which HIP has been installed") | |||
else() | |||
set(HIP_PATH | |||
$ENV{HIP_PATH} | |||
CACHE PATH "Path to which HIP has been installed") | |||
endif() | |||
endif() | |||
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) | |||
find_package(HIP QUIET) | |||
if (HIP_FOUND) | |||
message(STATUS "Found HIP: " ${HIP_VERSION}) | |||
if(HIP_FOUND) | |||
message(STATUS "Found HIP: " ${HIP_VERSION}) | |||
else() | |||
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.") | |||
message( | |||
FATAL_ERROR | |||
"Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location." | |||
) | |||
endif() | |||
if (${HIP_VERSION} VERSION_LESS 3.0) | |||
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||
if(${HIP_VERSION} VERSION_LESS 3.0) | |||
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||
endif() | |||
macro(hipconfig_get_option variable option) | |||
if(NOT DEFINED ${variable}) | |||
execute_process( | |||
COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||
OUTPUT_VARIABLE ${variable}) | |||
endif() | |||
if(NOT DEFINED ${variable}) | |||
execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||
OUTPUT_VARIABLE ${variable}) | |||
endif() | |||
endmacro() | |||
hipconfig_get_option(HIP_COMPILER "--compiler") | |||
@@ -31,30 +37,33 @@ hipconfig_get_option(HIP_CPP_CONFIG "--cpp_config") | |||
separate_arguments(HIP_CPP_CONFIG) | |||
foreach(hip_config_item ${HIP_CPP_CONFIG}) | |||
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||
if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||
if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||
set(HIP_CPP_UNDEFINE | |||
"${HIP_CPP_UNDEFINE}\ | |||
#ifdef ${macro_name}\n#undef ${macro_name}\n\ | |||
#else\n#error\n\ | |||
#endif\n") | |||
elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||
elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||
set(HIP_CPP_UNDEFINE | |||
"${HIP_CPP_UNDEFINE}\ | |||
#ifdef ${macro_name}\n#undef ${macro_name}\n\ | |||
#else\n#error\n\ | |||
#endif\n") | |||
endif() | |||
endforeach() | |||
endif() | |||
endforeach() | |||
endforeach() | |||
message(STATUS "Using HIP compiler ${HIP_COMPILER}") | |||
if(${HIP_COMPILER} STREQUAL "hcc") | |||
set(MGE_ROCM_LIBS hip_hcc) | |||
message(WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||
elseif (${HIP_COMPILER} STREQUAL "clang") | |||
set(MGE_ROCM_LIBS amdhip64) | |||
set(MGE_ROCM_LIBS hip_hcc) | |||
message( | |||
WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||
elseif(${HIP_COMPILER} STREQUAL "clang") | |||
set(MGE_ROCM_LIBS amdhip64) | |||
endif() | |||
list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand) | |||
@@ -63,26 +72,28 @@ set(HIP_INCLUDE_DIR ${HIP_ROOT_DIR}/../include) | |||
set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib) | |||
function(find_rocm_library name dirname include library) | |||
find_path(${name}_LIBRARY_DIR | |||
NAMES ${library} | |||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||
PATH_SUFFIXES lib lib/x86_64 | |||
DOC "Path to ${name} library directory") | |||
find_path( | |||
${name}_LIBRARY_DIR | |||
NAMES ${library} | |||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||
PATH_SUFFIXES lib lib/x86_64 | |||
DOC "Path to ${name} library directory") | |||
if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||
message(FATAL_ERROR "Can not find ${name} library") | |||
endif() | |||
if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||
message(FATAL_ERROR "Can not find ${name} library") | |||
endif() | |||
find_path(${name}_INCLUDE_DIR | |||
NAMES ${include} | |||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||
PATH_SUFFIXES include | |||
DOC "Path to ${name} include directory") | |||
find_path( | |||
${name}_INCLUDE_DIR | |||
NAMES ${include} | |||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||
PATH_SUFFIXES include | |||
DOC "Path to ${name} include directory") | |||
if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||
message(FATAL_ERROR "Can not find ${name} include") | |||
endif() | |||
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||
if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||
message(FATAL_ERROR "Can not find ${name} include") | |||
endif() | |||
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||
endfunction() | |||
find_rocm_library(MIOPEN miopen miopen libMIOpen.so) | |||
@@ -1,166 +1,189 @@ | |||
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||
endif() | |||
if(MGE_CUDA_USE_STATIC) | |||
find_library(TRT_LIBRARY | |||
NAMES libnvinfer_static.a nvinfer.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT library." ) | |||
find_library(TRT_PLUGIN_LIBRARY | |||
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT plugin library." ) | |||
find_library( | |||
TRT_LIBRARY | |||
NAMES libnvinfer_static.a nvinfer.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT library.") | |||
find_library( | |||
TRT_PLUGIN_LIBRARY | |||
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT plugin library.") | |||
else() | |||
find_library(TRT_LIBRARY | |||
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT library." ) | |||
find_library(TRT_PLUGIN_LIBRARY | |||
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT plugin library." ) | |||
find_library( | |||
TRT_LIBRARY | |||
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT library.") | |||
find_library( | |||
TRT_PLUGIN_LIBRARY | |||
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||
HINTS ${ALTER_LIBRARY_PATHS} | |||
PATH_SUFFIXES lib lib64 | |||
DOC "TRT plugin library.") | |||
endif() | |||
if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||
) | |||
endif() | |||
if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||
) | |||
endif() | |||
get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH) | |||
find_path(TRT_INCLUDE_DIR | |||
NAMES NvInfer.h | |||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to TRT include directory." ) | |||
find_path(TRT_PLUGIN_INCLUDE_DIR | |||
NAMES NvInferPlugin.h | |||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to TRT plugin include directory." ) | |||
find_path( | |||
TRT_INCLUDE_DIR | |||
NAMES NvInfer.h | |||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to TRT include directory.") | |||
find_path( | |||
TRT_PLUGIN_INCLUDE_DIR | |||
NAMES NvInferPlugin.h | |||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||
PATH_SUFFIXES include | |||
DOC "Path to TRT plugin include directory.") | |||
if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||
) | |||
endif() | |||
if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||
message( | |||
FATAL_ERROR | |||
"Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||
) | |||
endif() | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR | |||
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR | |||
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH | |||
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||
if (TensorRT_MAJOR STREQUAL "") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||
if(TensorRT_MAJOR STREQUAL "") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR | |||
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR | |||
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH | |||
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||
endif() | |||
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||
set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" | |||
TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" | |||
TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" | |||
TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||
set(TRT_VERSION_STRING | |||
"${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||
if(MGE_CUDA_USE_STATIC) | |||
add_library(libnvinfer STATIC IMPORTED) | |||
add_library(libnvinfer_plugin STATIC IMPORTED) | |||
add_library(libnvinfer STATIC IMPORTED) | |||
add_library(libnvinfer_plugin STATIC IMPORTED) | |||
else() | |||
add_library(libnvinfer SHARED IMPORTED) | |||
add_library(libnvinfer_plugin SHARED IMPORTED) | |||
add_library(libnvinfer SHARED IMPORTED) | |||
add_library(libnvinfer_plugin SHARED IMPORTED) | |||
endif() | |||
set_target_properties(libnvinfer PROPERTIES | |||
IMPORTED_LOCATION ${TRT_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_INCLUDE_DIR} | |||
) | |||
set_target_properties(libnvinfer_plugin PROPERTIES | |||
IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR} | |||
) | |||
set_target_properties( | |||
libnvinfer PROPERTIES IMPORTED_LOCATION ${TRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||
${TRT_INCLUDE_DIR}) | |||
set_target_properties( | |||
libnvinfer_plugin PROPERTIES IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR}) | |||
message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||
message( | |||
STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | |||
if(MGE_CUDA_USE_STATIC) | |||
find_library(LIBMYELIN_COMPILER | |||
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||
PATHS ${__found_trt_root}/lib | |||
) | |||
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||
else() | |||
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||
endif() | |||
add_library(libmyelin_compiler STATIC IMPORTED) | |||
set_target_properties(libmyelin_compiler PROPERTIES | |||
IMPORTED_LOCATION ${LIBMYELIN_COMPILER} | |||
) | |||
if(MGE_CUDA_USE_STATIC) | |||
find_library( | |||
LIBMYELIN_COMPILER | |||
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||
PATHS ${__found_trt_root}/lib) | |||
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||
else() | |||
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||
endif() | |||
add_library(libmyelin_compiler STATIC IMPORTED) | |||
set_target_properties(libmyelin_compiler PROPERTIES IMPORTED_LOCATION | |||
${LIBMYELIN_COMPILER}) | |||
find_library(LIBMYELIN_EXECUTOR | |||
NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||
PATHS ${__found_trt_root}/lib | |||
) | |||
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||
endif() | |||
add_library(libmyelin_executor STATIC IMPORTED) | |||
set_target_properties(libmyelin_executor PROPERTIES | |||
IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR} | |||
) | |||
find_library( | |||
LIBMYELIN_EXECUTOR | |||
NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||
PATHS ${__found_trt_root}/lib) | |||
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||
endif() | |||
add_library(libmyelin_executor STATIC IMPORTED) | |||
set_target_properties(libmyelin_executor PROPERTIES IMPORTED_LOCATION | |||
${LIBMYELIN_EXECUTOR}) | |||
find_library(LIBMYELIN_PATTERN_RUNTIME | |||
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||
PATHS ${__found_trt_root}/lib | |||
) | |||
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||
endif() | |||
add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||
set_target_properties(libmyelin_pattern_runtime PROPERTIES | |||
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME} | |||
) | |||
find_library( | |||
LIBMYELIN_PATTERN_RUNTIME | |||
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||
PATHS ${__found_trt_root}/lib) | |||
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||
else() | |||
message( | |||
STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||
endif() | |||
add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||
set_target_properties(libmyelin_pattern_runtime | |||
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME}) | |||
find_library(LIBMYELIN_PATTERN_LIBRARY | |||
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||
PATHS ${__found_trt_root}/lib | |||
) | |||
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||
endif() | |||
add_library(libmyelin_pattern_library STATIC IMPORTED) | |||
set_target_properties(libmyelin_pattern_library PROPERTIES | |||
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY} | |||
) | |||
find_library( | |||
LIBMYELIN_PATTERN_LIBRARY | |||
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||
PATHS ${__found_trt_root}/lib) | |||
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||
else() | |||
find_library(LIBMYELIN_SHARED | |||
NAMES libmyelin.so myelin.dll | |||
PATHS ${__found_trt_root}/lib | |||
) | |||
message( | |||
STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||
endif() | |||
add_library(libmyelin_pattern_library STATIC IMPORTED) | |||
set_target_properties(libmyelin_pattern_library | |||
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY}) | |||
else() | |||
find_library( | |||
LIBMYELIN_SHARED | |||
NAMES libmyelin.so myelin.dll | |||
PATHS ${__found_trt_root}/lib) | |||
if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||
endif() | |||
add_library(libmyelin SHARED IMPORTED) | |||
set_target_properties(libmyelin PROPERTIES | |||
IMPORTED_LOCATION ${LIBMYELIN_SHARED} | |||
) | |||
if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||
else() | |||
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||
endif() | |||
add_library(libmyelin SHARED IMPORTED) | |||
set_target_properties(libmyelin PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_SHARED}) | |||
endif() | |||
endif() |
@@ -1,17 +1,26 @@ | |||
include(ExternalProject) | |||
include(GNUInstallDirs) | |||
set(ZMQ_DIR ${PROJECT_SOURCE_DIR}/third_party/libzmq CACHE STRING "ZMQ directory") | |||
set(ZMQ_DIR | |||
${PROJECT_SOURCE_DIR}/third_party/libzmq | |||
CACHE STRING "ZMQ directory") | |||
set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq) | |||
set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a) | |||
ExternalProject_add( | |||
zmq | |||
SOURCE_DIR ${ZMQ_DIR} | |||
PREFIX ${ZMQ_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} -DWITH_PERF_TOOL=OFF -DZMQ_BUILD_TESTS=OFF -DENABLE_CPACK=OFF -DENABLE_CURVE=OFF | |||
BUILD_BYPRODUCTS ${ZMQ_LIB} | |||
) | |||
ExternalProject_Add( | |||
zmq | |||
SOURCE_DIR ${ZMQ_DIR} | |||
PREFIX ${ZMQ_BUILD_DIR} | |||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||
-DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} | |||
-DWITH_PERF_TOOL=OFF | |||
-DZMQ_BUILD_TESTS=OFF | |||
-DENABLE_CPACK=OFF | |||
-DENABLE_CURVE=OFF | |||
BUILD_BYPRODUCTS ${ZMQ_LIB}) | |||
set(ZMQ_INC ${ZMQ_BUILD_DIR}/include) | |||
include_directories(${ZMQ_INC}) | |||
@@ -19,8 +28,5 @@ file(MAKE_DIRECTORY ${ZMQ_INC}) | |||
add_library(libzmq STATIC IMPORTED GLOBAL) | |||
add_dependencies(libzmq zmq) | |||
set_target_properties( | |||
libzmq PROPERTIES | |||
IMPORTED_LOCATION ${ZMQ_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC} | |||
) | |||
set_target_properties(libzmq PROPERTIES IMPORTED_LOCATION ${ZMQ_LIB} | |||
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC}) |
@@ -4,66 +4,61 @@ set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) | |||
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/) | |||
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn) | |||
add_custom_command( | |||
OUTPUT | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||
tmp_unuse.log --write-cppjson ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||
VERBATIM | |||
) | |||
list(APPEND OPR_PARAM_DEFS_OUTS | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
COMMAND | |||
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||
${OPR_PARAM_DEFS_SRCS} tmp_unuse.log --write-cppjson | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||
) | |||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||
VERBATIM) | |||
list(APPEND OPR_PARAM_DEFS_OUTS ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h) | |||
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | |||
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | |||
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common) | |||
add_custom_command( | |||
OUTPUT | |||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||
--enumv ${OPR_PARAM_DEFS_SRCS} | |||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||
VERBATIM | |||
) | |||
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||
COMMAND | |||
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} --enumv | |||
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||
VERBATIM) | |||
list(APPEND OPR_PARAM_DEFS_OUTS | |||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||
) | |||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh) | |||
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | |||
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | |||
install( | |||
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn | |||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||
FILES_MATCHING | |||
PATTERN "*.h") | |||
add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | |||
add_library(opr_param_defs INTERFACE) | |||
target_include_directories(opr_param_defs | |||
INTERFACE | |||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||
) | |||
foreach (INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||
target_include_directories(opr_param_defs | |||
INTERFACE $<BUILD_INTERFACE:${INCPATH}> | |||
) | |||
INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) | |||
foreach(INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||
target_include_directories(opr_param_defs INTERFACE $<BUILD_INTERFACE:${INCPATH}>) | |||
endforeach() | |||
add_dependencies(opr_param_defs _opr_param_defs) | |||
install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | |||
if(MGE_WITH_CUDA) | |||
add_library(cutlass INTERFACE) | |||
target_include_directories(cutlass | |||
INTERFACE | |||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||
add_library(cutlass INTERFACE) | |||
target_include_directories( | |||
cutlass | |||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||
endif() | |||
if(MGE_WITH_TEST) | |||
add_subdirectory(test) | |||
add_subdirectory(test) | |||
endif() | |||
add_subdirectory(src) | |||
@@ -1,6 +1,8 @@ | |||
add_library(atlas-stub STATIC src/libatlas-wrap.cpp) | |||
target_include_directories(atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||
target_include_directories( | |||
atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||
install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS}) | |||
add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp) | |||
target_include_directories(acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||
target_include_directories( | |||
acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) |
@@ -1,26 +1,27 @@ | |||
file (GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||
file (GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||
file(GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||
file(GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||
if(MGE_WITH_CUDA_STUB) | |||
list(APPEND STUB_SRC ${CUDA_STUB}) | |||
list(APPEND STUB_SRC ${CUDA_STUB}) | |||
endif() | |||
if(MGE_WITH_NVRTC_STUB) | |||
list(APPEND STUB_SRC ${NVRTC_STUB}) | |||
list(APPEND STUB_SRC ${NVRTC_STUB}) | |||
endif() | |||
if(MSVC OR WIN32) | |||
add_library (cuda-stub STATIC ${STUB_SRC}) | |||
add_library(cuda-stub STATIC ${STUB_SRC}) | |||
else() | |||
add_library (cuda-stub SHARED ${STUB_SRC}) | |||
add_library(cuda-stub SHARED ${STUB_SRC}) | |||
endif() | |||
set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub) | |||
target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL) | |||
if (MSVC OR WIN32) | |||
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||
if(MSVC OR WIN32) | |||
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||
else() | |||
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||
endif() | |||
target_include_directories(cuda-stub PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||
install (TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) | |||
target_include_directories(cuda-stub | |||
PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||
install(TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) |
@@ -12,6 +12,7 @@ | |||
#pragma once | |||
#include "megbrain_build_config.h" | |||
#include "megdnn/oprs/base.h" | |||
#if MGB_ENABLE_GETENV | |||
#define MGB_GETENV ::std::getenv | |||
@@ -36,6 +37,11 @@ bool has_available_algo(Opr* opr, Args&&... args) { | |||
return !all_algos.empty(); | |||
} | |||
template <class Opr, typename... Args> | |||
bool has_no_naive_heuristic_algo(Opr* opr, Args&&... args) { | |||
auto&& algo = opr->get_algorithm_info_heuristic(std::forward<Args>(args)...); | |||
return !static_cast<bool>(algo.attribute & detail::Algorithm::Attribute::NAIVE); | |||
} | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -1936,6 +1936,119 @@ protected: | |||
const TensorLayout& grad_s, size_t workspace_in_bytes); | |||
}; | |||
class LayerNormBase : public OperatorBase { | |||
DEF_OPR_IMPL_CTOR(LayerNormBase, OperatorBase); | |||
DEF_OPR_PARAM(LayerNorm); | |||
protected: | |||
void deduce_layout_fwd( | |||
const TensorLayout& data, const TensorLayout& weight, | |||
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||
TensorLayout& rstd); | |||
void check_layout_fwd( | |||
const TensorLayout& data, const TensorLayout& weight, | |||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||
const TensorLayout& rstd); | |||
}; | |||
class LayerNormForward : public LayerNormBase { | |||
DEF_OPR_IMPL(LayerNormForward, LayerNormBase, 3, 3); | |||
public: | |||
virtual void exec( | |||
_megdnn_tensor_in data, _megdnn_tensor_in weight, _megdnn_tensor_in bias, | |||
_megdnn_tensor_out dst, _megdnn_tensor_out mean, _megdnn_tensor_out rstd, | |||
_megdnn_workspace workspace) = 0; | |||
void deduce_layout( | |||
const TensorLayout& data, const TensorLayout& weight, | |||
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||
TensorLayout& rstd); | |||
virtual size_t get_workspace_in_bytes( | |||
const TensorLayout& data, const TensorLayout& weight, | |||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||
const TensorLayout& rstd) = 0; | |||
protected: | |||
void check_exec( | |||
const TensorLayout& data, const TensorLayout& weight, | |||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||
const TensorLayout& rstd, size_t workspace_in_bytes); | |||
}; | |||
using LayerNorm = LayerNormForward; | |||
class LayerNormBackward : public LayerNormBase { | |||
DEF_OPR_IMPL(LayerNormBackward, LayerNormBase, 5, 3); | |||
public: | |||
virtual void exec( | |||
_megdnn_tensor_in diff, _megdnn_tensor_in data, _megdnn_tensor_in weight, | |||
_megdnn_tensor_in mean, _megdnn_tensor_in rstd, _megdnn_tensor_out ddata, | |||
_megdnn_tensor_out dweight, _megdnn_tensor_out dbias, | |||
_megdnn_workspace workspace) = 0; | |||
void deduce_layout( | |||
const TensorLayout& diff, const TensorLayout& data, | |||
const TensorLayout& weight, const TensorLayout& mean, | |||
const TensorLayout& rstd, TensorLayout& ddata, TensorLayout& dweight, | |||
TensorLayout& dbias); | |||
virtual size_t get_workspace_in_bytes( | |||
const TensorLayout& diff, const TensorLayout& data, | |||
const TensorLayout& weight, const TensorLayout& mean, | |||
const TensorLayout& rstd, const TensorLayout& ddata, | |||
const TensorLayout& dweight, const TensorLayout& dbias) = 0; | |||
protected: | |||
void check_exec( | |||
const TensorLayout& diff, const TensorLayout& data, | |||
const TensorLayout& weight, const TensorLayout& mean, | |||
const TensorLayout& rstd, const TensorLayout& ddata, | |||
const TensorLayout& dweight, const TensorLayout& dbias, | |||
size_t workspace_in_bytes); | |||
}; | |||
class DropoutBase : public OperatorBase { | |||
DEF_OPR_IMPL_CTOR(DropoutBase, OperatorBase); | |||
DEF_OPR_PARAM(Dropout); | |||
}; | |||
class DropoutForward : public DropoutBase { | |||
DEF_OPR_IMPL(DropoutForward, DropoutBase, 1, 2); | |||
public: | |||
void deduce_layout(const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask); | |||
virtual void exec( | |||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||
_megdnn_workspace workspace) = 0; | |||
virtual size_t get_workspace_in_bytes( | |||
const TensorLayout& inp, const TensorLayout& oup, | |||
const TensorLayout& mask) = 0; | |||
virtual size_t get_mask_size_in_bytes(const TensorLayout& inp) = 0; | |||
protected: | |||
void check_exec( | |||
const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask, | |||
size_t workspace_in_bytes); | |||
}; | |||
using Dropout = DropoutForward; | |||
class DropoutBackward : public DropoutBase { | |||
DEF_OPR_IMPL(DropoutBackward, DropoutBase, 2, 1); | |||
public: | |||
void deduce_layout( | |||
const TensorLayout& doup, const TensorLayout& mask, TensorLayout& dinp); | |||
virtual void exec( | |||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||
_megdnn_workspace workspace) = 0; | |||
virtual size_t get_workspace_in_bytes( | |||
const TensorLayout& doup, const TensorLayout& mask, | |||
const TensorLayout& dinp) = 0; | |||
protected: | |||
void check_exec( | |||
const TensorLayout& doup, const TensorLayout& mask, | |||
const TensorLayout& dinp, size_t workspace_in_bytes); | |||
}; | |||
} // namespace megdnn | |||
#include "megdnn/internal/opr_header_epilogue.h" | |||
@@ -1212,3 +1212,15 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), | |||
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | |||
) | |||
) | |||
(pdef('LayerNorm') | |||
.add_fields('bool', 'affine', 'true') | |||
.add_fields('float32', 'eps', '1e-5f') | |||
.add_fields('uint64', 'normalized_dim', '1') | |||
.add_fields('uint64', 'normalized_size', '1') | |||
) | |||
(pdef('Dropout') | |||
.add_fields('float32', 'drop_prob', '0') | |||
.add_fields('uint64', 'seed', '0') | |||
) |
@@ -5,168 +5,190 @@ file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) | |||
list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | |||
if(NOT ${MGE_ARCH} STREQUAL "naive") | |||
file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||
file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
if(${MGE_ARCH} STREQUAL "fallback") | |||
message(WARNING "build only with fallback") | |||
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||
file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
if(${MGE_ARCH} STREQUAL "fallback") | |||
message(WARNING "build only with fallback") | |||
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||
file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
if(NOT MSVC) | |||
file(GLOB_RECURSE SOURCES_ x86/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
endif() | |||
elseif(${MGE_ARCH} STREQUAL "armv7") | |||
file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
elseif(${MGE_ARCH} STREQUAL "aarch64") | |||
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
if(NOT MSVC) | |||
file(GLOB_RECURSE SOURCES_ x86/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
endif() | |||
elseif(${MGE_ARCH} STREQUAL "armv7") | |||
file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
elseif(${MGE_ARCH} STREQUAL "aarch64") | |||
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
endif() | |||
endif() | |||
if(MGE_WITH_MIDOUT_PROFILE) | |||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||
endif() | |||
############################################################################### | |||
# ###################################################################################### | |||
# HIP_COMPILE | |||
############################################################################### | |||
macro (HIP_COMPILE _hip_target _hip_objs) | |||
# Separate the sources from the options | |||
HIP_GET_SOURCES_AND_OPTIONS(_sources | |||
_cmake_options | |||
_hipcc_options | |||
_hcc_options | |||
_nvcc_options | |||
${ARGN}) | |||
HIP_PREPARE_TARGET_COMMANDS(${_hip_target} | |||
OBJ _generated_files _source_files ${_sources} ${_cmake_options} | |||
HIPCC_OPTIONS ${_hipcc_options} | |||
HCC_OPTIONS ${_hcc_options} | |||
NVCC_OPTIONS ${_nvcc_options}) | |||
if(_source_files) | |||
list(REMOVE_ITEM _sources ${_source_files}) | |||
endif() | |||
# ###################################################################################### | |||
macro(HIP_COMPILE _hip_target _hip_objs) | |||
# Separate the sources from the options | |||
hip_get_sources_and_options(_sources _cmake_options _hipcc_options _hcc_options | |||
_nvcc_options ${ARGN}) | |||
hip_prepare_target_commands( | |||
${_hip_target} | |||
OBJ | |||
_generated_files | |||
_source_files | |||
${_sources} | |||
${_cmake_options} | |||
HIPCC_OPTIONS | |||
${_hipcc_options} | |||
HCC_OPTIONS | |||
${_hcc_options} | |||
NVCC_OPTIONS | |||
${_nvcc_options}) | |||
if(_source_files) | |||
list(REMOVE_ITEM _sources ${_source_files}) | |||
endif() | |||
add_custom_target(${_hip_target}) | |||
add_custom_target(${_hip_target}) | |||
# set return value | |||
set(${_hip_objs} ${_generated_files}) | |||
# set return value | |||
set(${_hip_objs} ${_generated_files}) | |||
endmacro() | |||
if (MGE_WITH_ROCM) | |||
file (GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||
list (APPEND SOURCES ${SOURCES_}) | |||
# FIXME rocm may lost the first hip file, so currently we just create an | |||
# empty file to bypass this error. | |||
file(GLOB start.cpp.hip "" ) | |||
list(APPEND HIP_SOURCES start.cpp.hip) | |||
configure_file( | |||
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||
configure_file( | |||
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||
file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||
set(HIP_TARGET_NAME megdnn_hip_kernel) | |||
set(_HIPCC_OPTIONS "-fPIC") | |||
set(_HCC_OPTIONS "-fPIC") | |||
set(_NVCC_OPTIONS "-fPIC") | |||
list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||
HIP_INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/dnn | |||
${PROJECT_SOURCE_DIR}/dnn/include | |||
${PROJECT_BINARY_DIR}/dnn | |||
${PROJECT_BINARY_DIR}/genfiles | |||
${PROJECT_BINARY_DIR}/dnn/include | |||
${HIP_INCLUDE_DIR} | |||
${MIOPEN_INCLUDE_DIR} | |||
${ROCBLAS_INCLUDE_DIR} | |||
${ROCRAND_INCLUDE_DIR} | |||
${AMDOCL_INCLUDE_DIR}) | |||
hip_compile( | |||
${HIP_TARGET_NAME} HIPOBJS ${HIP_SOURCES} | |||
HIPCC_OPTIONS ${_HIPCC_OPTIONS} | |||
HCC_OPTIONS ${_HCC_OPTIONS} | |||
NVCC_OPTIONS ${_NVCC_OPTIONS}) | |||
list(APPEND SOURCES ${HIPOBJS}) | |||
endif () | |||
if(MGE_WITH_ROCM) | |||
file(GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
# FIXME rocm may lost the first hip file, so currently we just create an empty file to | |||
# bypass this error. | |||
file(GLOB start.cpp.hip "") | |||
list(APPEND HIP_SOURCES start.cpp.hip) | |||
configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||
configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||
file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||
set(HIP_TARGET_NAME megdnn_hip_kernel) | |||
set(_HIPCC_OPTIONS "-fPIC") | |||
set(_HCC_OPTIONS "-fPIC") | |||
set(_NVCC_OPTIONS "-fPIC") | |||
list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||
hip_include_directories( | |||
${PROJECT_SOURCE_DIR}/dnn | |||
${PROJECT_SOURCE_DIR}/dnn/include | |||
${PROJECT_BINARY_DIR}/dnn | |||
${PROJECT_BINARY_DIR}/genfiles | |||
${PROJECT_BINARY_DIR}/dnn/include | |||
${HIP_INCLUDE_DIR} | |||
${MIOPEN_INCLUDE_DIR} | |||
${ROCBLAS_INCLUDE_DIR} | |||
${ROCRAND_INCLUDE_DIR} | |||
${AMDOCL_INCLUDE_DIR}) | |||
hip_compile( | |||
${HIP_TARGET_NAME} | |||
HIPOBJS | |||
${HIP_SOURCES} | |||
HIPCC_OPTIONS | |||
${_HIPCC_OPTIONS} | |||
HCC_OPTIONS | |||
${_HCC_OPTIONS} | |||
NVCC_OPTIONS | |||
${_NVCC_OPTIONS}) | |||
list(APPEND SOURCES ${HIPOBJS}) | |||
endif() | |||
if(MGE_WITH_CUDA) | |||
file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||
set(CUTLASS_GEN_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||
set(CUTLASS_SOURCES "") | |||
function(gen_cutlass_kimpl op type gen_files) | |||
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||
set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||
execute_process( | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations ${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||
RESULT_VARIABLE gen_cutlass_result | |||
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||
) | |||
if (NOT gen_cutlass_result EQUAL 0) | |||
message(FATAL_ERROR "Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log") | |||
endif() | |||
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||
if (NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||
endif() | |||
endforeach() | |||
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" "${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||
endforeach() | |||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||
set(${gen_files} "${${gen_files}}" PARENT_SCOPE) | |||
endfunction() | |||
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||
list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||
list(APPEND SOURCES ${CUSOURCES}) | |||
file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||
set(CUTLASS_GEN_SCRIPT | |||
${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||
set(CUTLASS_SOURCES "") | |||
function(gen_cutlass_kimpl op type gen_files) | |||
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||
set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||
execute_process( | |||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations | |||
${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||
RESULT_VARIABLE gen_cutlass_result | |||
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log) | |||
if(NOT gen_cutlass_result EQUAL 0) | |||
message( | |||
FATAL_ERROR | |||
"Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log" | |||
) | |||
endif() | |||
file( | |||
GLOB CUTLASS_GEN_FILES | |||
RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" | |||
"${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||
if(NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||
endif() | |||
endforeach() | |||
file( | |||
GLOB CUTLASS_GEN_FILES | |||
RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" | |||
"${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||
execute_process( | |||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | |||
"${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||
endforeach() | |||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||
set(${gen_files} | |||
"${${gen_files}}" | |||
PARENT_SCOPE) | |||
endfunction() | |||
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||
list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||
list(APPEND SOURCES ${CUSOURCES}) | |||
endif() | |||
if(MGE_WITH_ATLAS) | |||
file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||
file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||
list(APPEND SOURCES ${SOURCES_}) | |||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||
endif() | |||
add_definitions(${LIBMEGDNN_DEF}) | |||
@@ -174,81 +196,85 @@ add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | |||
target_link_libraries(megdnn PUBLIC opr_param_defs) | |||
if(MGE_WITH_CUDA) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||
endif() | |||
if(MGE_WITH_ROCM) | |||
target_include_directories(megdnn PUBLIC | |||
${HIP_INCLUDE_DIR} | |||
${MIOPEN_INCLUDE_DIR} | |||
${ROCBLAS_INCLUDE_DIR} | |||
${ROCRAND_INCLUDE_DIR} | |||
${AMDOCL_INCLUDE_DIR}) | |||
target_link_directories(megdnn PUBLIC | |||
${HIP_LIBRARY_DIR} | |||
${MIOPEN_LIBRARY_DIR} | |||
${ROCBLAS_LIBRARY_DIR} | |||
${ROCRAND_LIBRARY_DIR} | |||
${AMDOCL_LIBRARY_DIR}) | |||
target_include_directories( | |||
megdnn PUBLIC ${HIP_INCLUDE_DIR} ${MIOPEN_INCLUDE_DIR} ${ROCBLAS_INCLUDE_DIR} | |||
${ROCRAND_INCLUDE_DIR} ${AMDOCL_INCLUDE_DIR}) | |||
target_link_directories( | |||
megdnn | |||
PUBLIC | |||
${HIP_LIBRARY_DIR} | |||
${MIOPEN_LIBRARY_DIR} | |||
${ROCBLAS_LIBRARY_DIR} | |||
${ROCRAND_LIBRARY_DIR} | |||
${AMDOCL_LIBRARY_DIR}) | |||
endif() | |||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | |||
if(MGE_ENABLE_CPUINFO) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||
endif() | |||
if(${MGE_ARCH} STREQUAL "x86_64" | |||
OR ${MGE_ARCH} STREQUAL "i386" | |||
OR ${MGE_ARCH} STREQUAL "armv7" | |||
OR ${MGE_ARCH} STREQUAL "aarch64") | |||
if(MGE_ENABLE_CPUINFO) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||
endif() | |||
endif() | |||
target_include_directories(megdnn | |||
PUBLIC | |||
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||
PRIVATE | |||
${PROJECT_SOURCE_DIR}/dnn | |||
${PROJECT_SOURCE_DIR}/third_party/midout/src | |||
) | |||
target_include_directories( | |||
megdnn | |||
PUBLIC $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||
PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) | |||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") | |||
install( | |||
DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include | |||
DESTINATION . | |||
FILES_MATCHING | |||
PATTERN "*.h*") | |||
if(CXX_SUPPORT_WCLASS_MEMACCESS) | |||
if(MGE_WITH_CUDA) | |||
target_compile_options(megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||
else() | |||
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||
endif() | |||
if(MGE_WITH_CUDA) | |||
target_compile_options( | |||
megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||
else() | |||
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||
endif() | |||
endif() | |||
target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF}) | |||
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | |||
if (BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||
else() | |||
target_link_libraries(megdnn PRIVATE dnnl) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||
else() | |||
target_link_libraries(megdnn PRIVATE dnnl) | |||
endif() | |||
endif() | |||
if (BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||
if(BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||
else() | |||
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||
endif() | |||
if (MGE_WITH_ROCM) | |||
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||
endif () | |||
if(MGE_WITH_ROCM) | |||
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||
endif() | |||
if(MGE_WITH_ATLAS) | |||
if (BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||
else() | |||
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||
else() | |||
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||
endif() | |||
endif() | |||
if(CMAKE_THREAD_LIBS_INIT) | |||
target_link_libraries(megdnn PRIVATE Threads::Threads) | |||
target_link_libraries(megdnn PRIVATE Threads::Threads) | |||
endif() | |||
install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS}) |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1(2); | |||
INSTANTIATION_CONV_S1_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_NO_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2(5); | |||
INSTANTIATION_CONV_S2_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_NO_BIAS(2); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1(5); | |||
INSTANTIATION_CONV_S1_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_NO_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2(2); | |||
INSTANTIATION_CONV_S2_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_NO_BIAS(3); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1(3); | |||
INSTANTIATION_CONV_S1_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_NO_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2(7); | |||
INSTANTIATION_CONV_S2_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_NO_BIAS(5); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1(7); | |||
INSTANTIATION_CONV_S1_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||
INSTANTIATION_CONV_S1_NO_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2(3); | |||
INSTANTIATION_CONV_S2_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||
INSTANTIATION_CONV_S2_NO_BIAS(7); | |||
// vim: syntax=cpp.doxygen |
@@ -469,9 +469,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | |||
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | |||
#define INSTANTIATION_CONV_S1(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||
FOR_OP(filter_size, BiasMode::BIAS) | |||
#define INSTANTIATION_CONV_S1_NO_BIAS(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::NO_BIAS) | |||
// vim: syntax=cpp.doxygen | |||
#define INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define INSTANTIATION_CONV_S1_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||
// vim: syntax=cpp.doxygen |
@@ -550,9 +550,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | |||
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | |||
#define INSTANTIATION_CONV_S2(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||
FOR_OP(filter_size, BiasMode::BIAS) | |||
#define INSTANTIATION_CONV_S2_NO_BIAS(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::NO_BIAS) | |||
// vim: syntax=cpp.doxygen | |||
#define INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define INSTANTIATION_CONV_S2_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV(2, 1); | |||
INSTANCE_CONV_BIAS(2, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(2, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV(2, 2); | |||
INSTANCE_CONV_BIAS(2, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(2, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV(3, 1); | |||
INSTANCE_CONV_BIAS(3, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(3, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2.cpp | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -11,4 +11,5 @@ | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV(3, 2); | |||
INSTANCE_CONV_BIAS(3, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(3, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BIAS(5, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(5, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BIAS(5, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(5, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BIAS(7, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(7, 1); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BIAS(7, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||
INSTANCE_CONV_NO_BIAS(7, 2); | |||
// vim: syntax=cpp.doxygen |
@@ -928,9 +928,11 @@ void fp32_direct_nchw_nchw44::conv_direct_fp32_nchw_nchw44( | |||
INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \ | |||
INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>) | |||
#define INSTANCE_CONV(filter, stride) \ | |||
FOR_OP(stride, filter, BiasMode::NO_BIAS) \ | |||
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||
FOR_OP(stride, filter, BiasMode::BIAS) | |||
#define INSTANCE_CONV_NO_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::NO_BIAS) | |||
#define INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(filter, stride) \ | |||
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define INSTANCE_CONV_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::BIAS) | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.cpp | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -265,7 +265,8 @@ void conv_direct_sdot_int8_nchw44( | |||
#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | |||
template void \ | |||
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||
dst_type, stride, bias_mode, Op, filter_size>( \ | |||
dst_type * dst, const int oh, const int ow, const int8_t* src, \ | |||
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | |||
const int oh_size, const int oc, const int ic, const Op& op); | |||
@@ -284,22 +285,6 @@ void conv_direct_sdot_int8_nchw44( | |||
FOR_OP(stride, i, BiasMode::NO_BIAS) \ | |||
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define FOR_FILTER(stride) \ | |||
FOR_BIAS(stride, 2) \ | |||
FOR_BIAS(stride, 3) \ | |||
FOR_BIAS(stride, 5) \ | |||
FOR_BIAS(stride, 7) | |||
FOR_FILTER(1) | |||
#undef FOR_STRIDE | |||
#undef FOR_FILTER | |||
#undef FOR_IC | |||
#undef FOR_BIAS | |||
#undef FOR_NONLINEAR | |||
#undef FOR_REMAIN | |||
#undef INSTANTIATION | |||
} // namespace direct_dotprod_nchw44 | |||
} // namespace arm_common | |||
} // namespace megdnn |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(1, 2); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(1, 3); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(1, 5); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(1, 7); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.cpp | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -266,7 +266,8 @@ void conv_direct_sdot_int8_nchw44( | |||
#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | |||
template void \ | |||
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||
dst_type, stride, bias_mode, Op, filter_size>( \ | |||
dst_type * dst, const int oh, const int ow, const int8_t* src, \ | |||
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | |||
const int oh_size, const int oc, const int ic, const Op& op); | |||
@@ -285,22 +286,6 @@ void conv_direct_sdot_int8_nchw44( | |||
FOR_OP(stride, i, BiasMode::NO_BIAS) \ | |||
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define FOR_FILTER(stride) \ | |||
FOR_BIAS(stride, 2) \ | |||
FOR_BIAS(stride, 3) \ | |||
FOR_BIAS(stride, 5) \ | |||
FOR_BIAS(stride, 7) | |||
FOR_FILTER(2) | |||
#undef FOR_STRIDE | |||
#undef FOR_FILTER | |||
#undef FOR_IC | |||
#undef FOR_BIAS | |||
#undef FOR_NONLINEAR | |||
#undef FOR_REMAIN | |||
#undef INSTANTIATION | |||
} // namespace direct_dotprod_nchw44 | |||
} // namespace arm_common | |||
} // namespace megdnn |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(2, 2); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(2, 3); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(2, 5); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||
#if MGB_ENABLE_DOT | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
FOR_BIAS(2, 7); | |||
#endif | |||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
@@ -45,4 +45,4 @@ public: | |||
} // namespace arm_common | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen | |||
// vim: syntax=cpp.doxygen |
@@ -13,336 +13,9 @@ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | |||
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | |||
namespace megdnn { | |||
namespace arm_common { | |||
namespace { | |||
/** | |||
* @brief core code for calculation patten | |||
* | |||
* @tparam src_idx is offset of src reg | |||
* @tparam weight_idx is offset of weight reg | |||
* @tparam c_dim is output channel | |||
* @tparam Func mla operation funcion | |||
* @tparam stride | |||
* @tparam T outpur regs type | |||
* @tparam T2 src regs type | |||
* @tparam T3 weight regs type | |||
* @tparam T4 temp regs type | |||
*/ | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3, typename T4> | |||
struct ShiftCalHelper { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3, typename T4> | |||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||
c, src, weight, temp); | |||
} | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3> | |||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||
c, src, weight); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||
c[0][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||
c[1][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||
c[0][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||
c[1][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||
c[0][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||
c[1][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||
c[0][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||
c[1][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||
c[0][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||
c[1][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||
c[0][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||
c[1][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||
c[0][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||
c[1][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||
c[0][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||
c[1][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||
} | |||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||
c[0][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||
c[0][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||
c[0][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||
c[0][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||
c[0][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||
c[0][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||
c[0][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||
c[0][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||
} | |||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 1; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 2; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 3; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 5; | |||
constexpr int filter_width = 8; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 2; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
#define cb(step) \ | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
UNROLL_CALL_RAW(5, cb); | |||
#undef cb | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 7; | |||
constexpr int filter_width = 8; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 2; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
#define cb(step) \ | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
UNROLL_CALL_RAW(7, cb); | |||
#undef cb | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
} // namespace | |||
namespace int8_direct_nchw_nchw44 { | |||
/** | |||
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | |||
@@ -444,115 +117,9 @@ void pack_nchw_src_for_nchw44_conv<1>( | |||
} | |||
} | |||
template <BiasMode bias_mode, typename Op, size_t filter_size> | |||
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||
static void impl( | |||
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||
int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||
const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||
MEGDNN_MARK_USED_VAR(temp); | |||
constexpr int stride = 1; | |||
constexpr size_t fh = filter_size; | |||
constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||
constexpr size_t ic_step = 1; | |||
constexpr size_t big_oc_step = 8; | |||
constexpr size_t oc_step = 4; | |||
constexpr size_t ih_step = 1; | |||
constexpr size_t oh_step = 1; | |||
constexpr size_t ow_step = 8; | |||
constexpr size_t stride_h = stride; | |||
constexpr size_t stride_w = stride; | |||
constexpr int pack_iw_len = 16; | |||
const size_t img_stride = oh * ow; | |||
const size_t ow_end = ow / ow_step * ow_step; | |||
const size_t ow_remain = ow - ow_end; | |||
const size_t oc_end = oc / big_oc_step * big_oc_step; | |||
const size_t oc_remain = oc - oc_end; | |||
const int ld_dst_oc = oc_step * img_stride; | |||
using remain_fun = std::function<void( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, | |||
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||
int ld_dst_oc, const Op& op)>; | |||
remain_fun kern_big_oc_remain = nullptr; | |||
remain_fun kern_small_oc_remain = nullptr; | |||
switch (ow_remain) { | |||
#define cb(step) \ | |||
case step: \ | |||
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||
break; | |||
UNROLL_CALL_RAW(8, cb); | |||
default: | |||
megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||
} | |||
for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||
KerNeonXXs2NchwNchw44< | |||
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||
impl(src + src_offset, filter + weight_offset, | |||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||
op); | |||
} | |||
if (ow_remain > 0) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||
kern_big_oc_remain( | |||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||
} | |||
} | |||
} | |||
if (oc_remain > 0) { | |||
size_t oc_idx = oc_end; | |||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||
KerNeonXXs2NchwNchw44< | |||
bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||
impl(src + src_offset, filter + weight_offset, | |||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||
op); | |||
} | |||
if (ow_remain > 0) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||
kern_small_oc_remain( | |||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||
} | |||
} | |||
} | |||
} | |||
}; | |||
#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | |||
template struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||
#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | |||
INSTANCE_CONV_KERN_FUN( \ | |||
@@ -566,17 +133,10 @@ struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | |||
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define INSTANCE_CONV_KERN(stride) \ | |||
INSTANCE_BIAS_MODE_PARAM(stride, 1) \ | |||
INSTANCE_BIAS_MODE_PARAM(stride, 2) \ | |||
INSTANCE_BIAS_MODE_PARAM(stride, 3) \ | |||
INSTANCE_BIAS_MODE_PARAM(stride, 5) \ | |||
INSTANCE_BIAS_MODE_PARAM(stride, 7) | |||
INSTANCE_CONV_KERN(1); | |||
#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||
} // namespace int8_direct_nchw_nchw44 | |||
} // namespace arm_common | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,481 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | |||
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | |||
namespace megdnn { | |||
namespace arm_common { | |||
namespace { | |||
/** | |||
* @brief core code for calculation patten | |||
* | |||
* @tparam src_idx is offset of src reg | |||
* @tparam weight_idx is offset of weight reg | |||
* @tparam c_dim is output channel | |||
* @tparam Func mla operation funcion | |||
* @tparam stride | |||
* @tparam T outpur regs type | |||
* @tparam T2 src regs type | |||
* @tparam T3 weight regs type | |||
* @tparam T4 temp regs type | |||
*/ | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3, typename T4> | |||
struct ShiftCalHelper { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3, typename T4> | |||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||
c, src, weight, temp); | |||
} | |||
template < | |||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||
typename T3> | |||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||
c, src, weight); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||
c[0][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||
c[1][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||
c[0][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||
c[1][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||
c[0][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||
c[1][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||
c[0][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||
c[1][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||
c[0][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||
c[1][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||
c[0][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||
c[1][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||
c[0][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||
c[1][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||
c[0][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||
c[1][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||
} | |||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||
}; | |||
template < | |||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||
c[0][0] = vdotq_s32_h( | |||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||
c[0][1] = vdotq_s32_h( | |||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||
c[0][2] = vdotq_s32_h( | |||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||
c[0][3] = vdotq_s32_h( | |||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||
c[0][4] = vdotq_s32_h( | |||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||
c[0][5] = vdotq_s32_h( | |||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||
c[0][6] = vdotq_s32_h( | |||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||
c[0][7] = vdotq_s32_h( | |||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||
} | |||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 1; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 2; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 3; | |||
constexpr int filter_width = 4; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 1; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 5; | |||
constexpr int filter_width = 8; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 2; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
#define cb(step) \ | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
UNROLL_CALL_RAW(5, cb); | |||
#undef cb | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||
static void impl( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||
constexpr int stride = 1; | |||
constexpr int filter_height = 7; | |||
constexpr int filter_width = 8; | |||
constexpr int oc_step = 4; | |||
constexpr int loop_ic_step = 1; | |||
constexpr int simd_len = 16; | |||
constexpr int pack_iw_len = 16; | |||
constexpr int src_reg = 8; | |||
constexpr int weight_reg = 2; | |||
const int ic_stride = ih * iw * pack_iw_len; | |||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||
constexpr int c_dim = OCHelper<oc_block>::val; | |||
int32x4_t c[c_dim][8]; | |||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||
int8x16_t src[src_reg]; | |||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||
int16x8_t temp_c[4]; | |||
#define cb(step) \ | |||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||
UNROLL_CALL_RAW(7, cb); | |||
#undef cb | |||
weight_ptr += oc_step * filter_height * filter_width; | |||
} | |||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||
c, op, dst_ptr, ld_dst_oc); | |||
} | |||
}; | |||
} // namespace | |||
namespace int8_direct_nchw_nchw44 { | |||
/** | |||
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | |||
* pack interleave two adjacent row in filter to one row | |||
* */ | |||
template <BiasMode bias_mode, typename Op, size_t filter_size> | |||
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||
static void impl( | |||
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||
int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||
const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||
MEGDNN_MARK_USED_VAR(temp); | |||
constexpr int stride = 1; | |||
constexpr size_t fh = filter_size; | |||
constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||
constexpr size_t ic_step = 1; | |||
constexpr size_t big_oc_step = 8; | |||
constexpr size_t oc_step = 4; | |||
constexpr size_t ih_step = 1; | |||
constexpr size_t oh_step = 1; | |||
constexpr size_t ow_step = 8; | |||
constexpr size_t stride_h = stride; | |||
constexpr size_t stride_w = stride; | |||
constexpr int pack_iw_len = 16; | |||
const size_t img_stride = oh * ow; | |||
const size_t ow_end = ow / ow_step * ow_step; | |||
const size_t ow_remain = ow - ow_end; | |||
const size_t oc_end = oc / big_oc_step * big_oc_step; | |||
const size_t oc_remain = oc - oc_end; | |||
const int ld_dst_oc = oc_step * img_stride; | |||
using remain_fun = std::function<void( | |||
const int8_t* src_ptr, const int8_t* weight_ptr, | |||
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||
int ld_dst_oc, const Op& op)>; | |||
remain_fun kern_big_oc_remain = nullptr; | |||
remain_fun kern_small_oc_remain = nullptr; | |||
switch (ow_remain) { | |||
#define cb(step) \ | |||
case step: \ | |||
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||
break; | |||
UNROLL_CALL_RAW(8, cb); | |||
default: | |||
megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||
} | |||
for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||
KerNeonXXs2NchwNchw44< | |||
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||
impl(src + src_offset, filter + weight_offset, | |||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||
op); | |||
} | |||
if (ow_remain > 0) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||
kern_big_oc_remain( | |||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||
} | |||
} | |||
} | |||
if (oc_remain > 0) { | |||
size_t oc_idx = oc_end; | |||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||
KerNeonXXs2NchwNchw44< | |||
bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||
impl(src + src_offset, filter + weight_offset, | |||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||
op); | |||
} | |||
if (ow_remain > 0) { | |||
const size_t src_offset = | |||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||
ic_step * pack_iw_len; | |||
const size_t dst_offset = | |||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||
kern_small_oc_remain( | |||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||
} | |||
} | |||
} | |||
} | |||
}; | |||
#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | |||
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||
#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | |||
INSTANCE_CONV_KERN_FUN( \ | |||
stride, filter, bias_mode, TypeCvtOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||
INSTANCE_CONV_KERN_FUN( \ | |||
stride, filter, bias_mode, ReluOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||
INSTANCE_CONV_KERN_FUN( \ | |||
stride, filter, bias_mode, HSwishOp<dt_qint32 MEGDNN_COMMA dt_qint8>) | |||
#define INSTANCE_BIAS_MODE_PARAM(stride, filter) \ | |||
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | |||
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||
#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||
} // namespace int8_direct_nchw_nchw44 | |||
} // namespace arm_common | |||
} // namespace megdnn | |||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,19 @@ | |||
/** | |||
* \file | |||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp | |||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||
* | |||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||
* | |||
* Unless required by applicable law or agreed to in writing, | |||
* software distributed under the License is distributed on an | |||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||
* implied. | |||
*/ | |||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h" | |||
using namespace megdnn; | |||
using namespace arm_common; | |||
INSTANCE_CONV_KERN(1, 1); | |||
// vim: syntax=cpp.doxygen |