@@ -29,6 +29,7 @@ jobs: | |||||
uses: actions/checkout@v2 | uses: actions/checkout@v2 | ||||
- name: Checkout submodules | - name: Checkout submodules | ||||
run: | | run: | | ||||
apt update&&apt install ninja-build | |||||
./third_party/prepare.sh | ./third_party/prepare.sh | ||||
./third_party/install-mkl.sh | ./third_party/install-mkl.sh | ||||
- name: Build MegEngine | - name: Build MegEngine | ||||
@@ -57,6 +58,7 @@ jobs: | |||||
uses: actions/checkout@v2 | uses: actions/checkout@v2 | ||||
- name: Checkout submodules | - name: Checkout submodules | ||||
run: | | run: | | ||||
apt update&&apt install ninja-build | |||||
./third_party/prepare.sh | ./third_party/prepare.sh | ||||
./third_party/install-mkl.sh | ./third_party/install-mkl.sh | ||||
- name: Build MegEngine | - name: Build MegEngine | ||||
@@ -27,7 +27,8 @@ function build() { | |||||
-DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | -DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \ | ||||
-DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | -DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \ | ||||
-DMGE_WITH_TEST=ON \ | -DMGE_WITH_TEST=ON \ | ||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo | |||||
-DCMAKE_BUILD_TYPE=RelWithDebInfo \ | |||||
-DMGE_WITH_CUSTOM_OP=ON | |||||
make -j$(($(nproc) * 2)) -I ${build_dir} | make -j$(($(nproc) * 2)) -I ${build_dir} | ||||
make develop | make develop | ||||
popd >/dev/null | popd >/dev/null | ||||
@@ -1,59 +1,56 @@ | |||||
# Copyright 2015 Google Inc. All rights reserved. | # Copyright 2015 Google Inc. All rights reserved. | ||||
# | # | ||||
# Licensed under the Apache License, Version 2.0 (the "License"); | |||||
# you may not use this file except in compliance with the License. | |||||
# You may obtain a copy of the License at | |||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this | |||||
# file except in compliance with the License. You may obtain a copy of the License at | |||||
# | # | ||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# http://www.apache.org/licenses/LICENSE-2.0 | |||||
# | # | ||||
# Unless required by applicable law or agreed to in writing, software | |||||
# distributed under the License is distributed on an "AS IS" BASIS, | |||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
# See the License for the specific language governing permissions and | |||||
# limitations under the License. | |||||
# Unless required by applicable law or agreed to in writing, software distributed under | |||||
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF | |||||
# ANY KIND, either express or implied. See the License for the specific language | |||||
# governing permissions and limitations under the License. | |||||
# General function to create FlatBuffer build rules for the given list of | |||||
# schemas. | |||||
# General function to create FlatBuffer build rules for the given list of schemas. | |||||
# | # | ||||
# flatbuffers_schemas: A list of flatbuffer schema files to process. | # flatbuffers_schemas: A list of flatbuffer schema files to process. | ||||
# | # | ||||
# schema_include_dirs: A list of schema file include directories, which will be | |||||
# passed to flatc via the -I parameter. | |||||
# schema_include_dirs: A list of schema file include directories, which will be passed | |||||
# to flatc via the -I parameter. | |||||
# | # | ||||
# custom_target_name: The generated files will be added as dependencies for a | |||||
# new custom target with this name. You should add that target as a dependency | |||||
# for your main target to ensure these files are built. You can also retrieve | |||||
# various properties from this target, such as GENERATED_INCLUDES_DIR, | |||||
# BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||||
# custom_target_name: The generated files will be added as dependencies for a new custom | |||||
# target with this name. You should add that target as a dependency for your main target | |||||
# to ensure these files are built. You can also retrieve various properties from this | |||||
# target, such as GENERATED_INCLUDES_DIR, BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR. | |||||
# | # | ||||
# additional_dependencies: A list of additional dependencies that you'd like | |||||
# all generated files to depend on. Pass in a blank string if you have none. | |||||
# additional_dependencies: A list of additional dependencies that you'd like all | |||||
# generated files to depend on. Pass in a blank string if you have none. | |||||
# | # | ||||
# generated_includes_dir: Where to generate the C++ header files for these | |||||
# schemas. The generated includes directory will automatically be added to | |||||
# CMake's include directories, and will be where generated header files are | |||||
# placed. This parameter is optional; pass in empty string if you don't want to | |||||
# generate include files for these schemas. | |||||
# generated_includes_dir: Where to generate the C++ header files for these schemas. The | |||||
# generated includes directory will automatically be added to CMake's include | |||||
# directories, and will be where generated header files are placed. This parameter is | |||||
# optional; pass in empty string if you don't want to generate include files for these | |||||
# schemas. | |||||
# | # | ||||
# binary_schemas_dir: If you specify an optional binary schema directory, binary | |||||
# schemas will be generated for these schemas as well, and placed into the given | |||||
# directory. | |||||
# binary_schemas_dir: If you specify an optional binary schema directory, binary schemas | |||||
# will be generated for these schemas as well, and placed into the given directory. | |||||
# | # | ||||
# copy_text_schemas_dir: If you want all text schemas (including schemas from | |||||
# all schema include directories) copied into a directory (for example, if you | |||||
# need them within your project to build JSON files), you can specify that | |||||
# folder here. All text schemas will be copied to that folder. | |||||
# copy_text_schemas_dir: If you want all text schemas (including schemas from all schema | |||||
# include directories) copied into a directory (for example, if you need them within | |||||
# your project to build JSON files), you can specify that folder here. All text schemas | |||||
# will be copied to that folder. | |||||
# | # | ||||
# IMPORTANT: Make sure you quote all list arguments you pass to this function! | |||||
# Otherwise CMake will only pass in the first element. | |||||
# Example: build_flatbuffers("${fb_files}" "${include_dirs}" target_name ...) | |||||
function(build_flatbuffers flatbuffers_schemas | |||||
schema_include_dirs | |||||
custom_target_name | |||||
additional_dependencies | |||||
generated_includes_dir | |||||
binary_schemas_dir | |||||
copy_text_schemas_dir) | |||||
# IMPORTANT: Make sure you quote all list arguments you pass to this function! Otherwise | |||||
# CMake will only pass in the first element. Example: build_flatbuffers("${fb_files}" | |||||
# "${include_dirs}" target_name ...) | |||||
function( | |||||
build_flatbuffers | |||||
flatbuffers_schemas | |||||
schema_include_dirs | |||||
custom_target_name | |||||
additional_dependencies | |||||
generated_includes_dir | |||||
binary_schemas_dir | |||||
copy_text_schemas_dir) | |||||
# Test if including from FindFlatBuffers | # Test if including from FindFlatBuffers | ||||
if(FLATBUFFERS_FLATC_EXECUTABLE) | if(FLATBUFFERS_FLATC_EXECUTABLE) | ||||
@@ -65,10 +62,7 @@ function(build_flatbuffers flatbuffers_schemas | |||||
endif() | endif() | ||||
set(FLATC_SCHEMA_ARGS --gen-mutable) | set(FLATC_SCHEMA_ARGS --gen-mutable) | ||||
if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS) | if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS) | ||||
set(FLATC_SCHEMA_ARGS | |||||
${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} | |||||
${FLATC_SCHEMA_ARGS} | |||||
) | |||||
set(FLATC_SCHEMA_ARGS ${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} ${FLATC_SCHEMA_ARGS}) | |||||
endif() | endif() | ||||
set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}") | set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}") | ||||
@@ -77,12 +71,12 @@ function(build_flatbuffers flatbuffers_schemas | |||||
# Generate the include files parameters. | # Generate the include files parameters. | ||||
set(include_params "") | set(include_params "") | ||||
set(all_generated_files "") | set(all_generated_files "") | ||||
foreach (include_dir ${schema_include_dirs}) | |||||
foreach(include_dir ${schema_include_dirs}) | |||||
set(include_params -I ${include_dir} ${include_params}) | set(include_params -I ${include_dir} ${include_params}) | ||||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
# Copy text schemas from dependent folders. | # Copy text schemas from dependent folders. | ||||
file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob}) | file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob}) | ||||
foreach (dependent_schema ${dependent_schemas}) | |||||
foreach(dependent_schema ${dependent_schemas}) | |||||
file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir}) | file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir}) | ||||
endforeach() | endforeach() | ||||
endif() | endif() | ||||
@@ -91,62 +85,54 @@ function(build_flatbuffers flatbuffers_schemas | |||||
foreach(schema ${flatbuffers_schemas}) | foreach(schema ${flatbuffers_schemas}) | ||||
get_filename_component(filename ${schema} NAME_WE) | get_filename_component(filename ${schema} NAME_WE) | ||||
# For each schema, do the things we requested. | # For each schema, do the things we requested. | ||||
if (NOT ${generated_includes_dir} STREQUAL "") | |||||
if(NOT ${generated_includes_dir} STREQUAL "") | |||||
set(generated_include ${generated_includes_dir}/${filename}_generated.h) | set(generated_include ${generated_includes_dir}/${filename}_generated.h) | ||||
add_custom_command( | add_custom_command( | ||||
OUTPUT ${generated_include} | OUTPUT ${generated_include} | ||||
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} | |||||
-o ${generated_includes_dir} | |||||
${include_params} | |||||
-c ${schema} | |||||
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} -o ${generated_includes_dir} | |||||
${include_params} -c ${schema} | |||||
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | ||||
WORKING_DIRECTORY "${working_dir}") | WORKING_DIRECTORY "${working_dir}") | ||||
list(APPEND all_generated_files ${generated_include}) | list(APPEND all_generated_files ${generated_include}) | ||||
endif() | endif() | ||||
if (NOT ${binary_schemas_dir} STREQUAL "") | |||||
if(NOT ${binary_schemas_dir} STREQUAL "") | |||||
set(binary_schema ${binary_schemas_dir}/${filename}.bfbs) | set(binary_schema ${binary_schemas_dir}/${filename}.bfbs) | ||||
add_custom_command( | add_custom_command( | ||||
OUTPUT ${binary_schema} | OUTPUT ${binary_schema} | ||||
COMMAND ${FLATC} -b --schema | |||||
-o ${binary_schemas_dir} | |||||
${include_params} | |||||
${schema} | |||||
COMMAND ${FLATC} -b --schema -o ${binary_schemas_dir} ${include_params} | |||||
${schema} | |||||
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies} | ||||
WORKING_DIRECTORY "${working_dir}") | WORKING_DIRECTORY "${working_dir}") | ||||
list(APPEND all_generated_files ${binary_schema}) | list(APPEND all_generated_files ${binary_schema}) | ||||
endif() | endif() | ||||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
file(COPY ${schema} DESTINATION ${copy_text_schemas_dir}) | file(COPY ${schema} DESTINATION ${copy_text_schemas_dir}) | ||||
endif() | endif() | ||||
endforeach() | endforeach() | ||||
# Create a custom target that depends on all the generated files. | |||||
# This is the target that you can depend on to trigger all these | |||||
# to be built. | |||||
add_custom_target(${custom_target_name} | |||||
DEPENDS ${all_generated_files} ${additional_dependencies}) | |||||
# Create a custom target that depends on all the generated files. This is the target | |||||
# that you can depend on to trigger all these to be built. | |||||
add_custom_target(${custom_target_name} DEPENDS ${all_generated_files} | |||||
${additional_dependencies}) | |||||
# Register the include directory we are using. | # Register the include directory we are using. | ||||
if (NOT ${generated_includes_dir} STREQUAL "") | |||||
if(NOT ${generated_includes_dir} STREQUAL "") | |||||
include_directories(${generated_includes_dir}) | include_directories(${generated_includes_dir}) | ||||
set_property(TARGET ${custom_target_name} | |||||
PROPERTY GENERATED_INCLUDES_DIR | |||||
${generated_includes_dir}) | |||||
set_property(TARGET ${custom_target_name} PROPERTY GENERATED_INCLUDES_DIR | |||||
${generated_includes_dir}) | |||||
endif() | endif() | ||||
# Register the binary schemas dir we are using. | # Register the binary schemas dir we are using. | ||||
if (NOT ${binary_schemas_dir} STREQUAL "") | |||||
set_property(TARGET ${custom_target_name} | |||||
PROPERTY BINARY_SCHEMAS_DIR | |||||
${binary_schemas_dir}) | |||||
if(NOT ${binary_schemas_dir} STREQUAL "") | |||||
set_property(TARGET ${custom_target_name} PROPERTY BINARY_SCHEMAS_DIR | |||||
${binary_schemas_dir}) | |||||
endif() | endif() | ||||
# Register the text schema copy dir we are using. | # Register the text schema copy dir we are using. | ||||
if (NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
set_property(TARGET ${custom_target_name} | |||||
PROPERTY COPY_TEXT_SCHEMAS_DIR | |||||
${copy_text_schemas_dir}) | |||||
if(NOT ${copy_text_schemas_dir} STREQUAL "") | |||||
set_property(TARGET ${custom_target_name} PROPERTY COPY_TEXT_SCHEMAS_DIR | |||||
${copy_text_schemas_dir}) | |||||
endif() | endif() | ||||
endfunction() | endfunction() |
@@ -1,49 +1,45 @@ | |||||
# Parses the version set in src/core/include/megbrain/version.h | |||||
# Exports the following variables: | |||||
# MGB_VER_MAJOR: Major version | |||||
# MGB_VER_MINOR: Minor version | |||||
# MGB_VER_PATCH: Patch version | |||||
# MGB_IS_DEV: Is development version | |||||
# MGB_VER_STRING: Version string | |||||
# Parses the version set in src/core/include/megbrain/version.h Exports the following | |||||
# variables: MGB_VER_MAJOR: Major version MGB_VER_MINOR: Minor version MGB_VER_PATCH: | |||||
# Patch version MGB_IS_DEV: Is development version MGB_VER_STRING: Version string | |||||
option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF) | option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF) | ||||
file (READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||||
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content) | |||||
string (REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||||
set (MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content}) | |||||
set(MGB_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||||
set (MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content}) | |||||
set(MGB_VER_MINOR ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||||
set (MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content}) | |||||
set(MGB_VER_PATCH ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||||
set (MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content}) | |||||
set(MGE_VER_MAJOR ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||||
set (MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content}) | |||||
set(MGE_VER_MINOR ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||||
set (MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content}) | |||||
set(MGE_VER_PATCH ${CMAKE_MATCH_1}) | |||||
string (REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||||
set (MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content}) | |||||
set(MGE_EXTRA_NAME ${CMAKE_MATCH_1}) | |||||
if (MGB_FORCE_DEV_VERSION) | |||||
set (MGB_IS_DEV 1) | |||||
if(MGB_FORCE_DEV_VERSION) | |||||
set(MGB_IS_DEV 1) | |||||
else() | else() | ||||
string (REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||||
set (MGB_IS_DEV ${CMAKE_MATCH_1}) | |||||
string(REGEX MATCH "MGB_IS_DEV +([01])" _ ${content}) | |||||
set(MGB_IS_DEV ${CMAKE_MATCH_1}) | |||||
endif() | endif() | ||||
if (DEFINED MGB_VER_MAJOR) | |||||
set (MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||||
if(DEFINED MGB_VER_MAJOR) | |||||
set(MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}") | |||||
else() | else() | ||||
set (MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||||
set(MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}") | |||||
endif(DEFINED MGB_VER_MAJOR) | endif(DEFINED MGB_VER_MAJOR) | ||||
if (MGB_IS_DEV) | |||||
set (MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||||
if(MGB_IS_DEV) | |||||
set(MGB_VER_STRING "${MGB_VER_STRING}-dev") | |||||
endif() | endif() | ||||
message(STATUS "Building MegBrain ${MGB_VER_STRING}") | message(STATUS "Building MegBrain ${MGB_VER_STRING}") |
@@ -2,31 +2,40 @@ | |||||
include(ExternalProject) | include(ExternalProject) | ||||
find_package(LLVM 6.0 REQUIRED CONFIG) | find_package(LLVM 6.0 REQUIRED CONFIG) | ||||
STRING(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||||
string(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION}) | |||||
list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR) | list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR) | ||||
list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR) | list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR) | ||||
set(HALIDE_DIR "${PROJECT_SOURCE_DIR}/third_party/Halide" CACHE STRING "halide directory") | |||||
set(HALIDE_DIR | |||||
"${PROJECT_SOURCE_DIR}/third_party/Halide" | |||||
CACHE STRING "halide directory") | |||||
set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide) | set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide) | ||||
set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a) | set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a) | ||||
ExternalProject_add( | |||||
halide | |||||
SOURCE_DIR ${HALIDE_DIR} | |||||
PREFIX ${HALIDE_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} -DWITH_APPS=OFF -DWITH_TESTS=OFF -DWITH_TUTORIALS=OFF -DHALIDE_SHARED_LIBRARY=OFF -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DTARGET_MIPS=OFF -DTARGET_POWERPC=OFF | |||||
BUILD_BYPRODUCTS ${HALIDE_LIB} | |||||
) | |||||
ExternalProject_Add( | |||||
halide | |||||
SOURCE_DIR ${HALIDE_DIR} | |||||
PREFIX ${HALIDE_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
-DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} | |||||
-DWITH_APPS=OFF | |||||
-DWITH_TESTS=OFF | |||||
-DWITH_TUTORIALS=OFF | |||||
-DHALIDE_SHARED_LIBRARY=OFF | |||||
-DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} | |||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
-DTARGET_MIPS=OFF | |||||
-DTARGET_POWERPC=OFF | |||||
BUILD_BYPRODUCTS ${HALIDE_LIB}) | |||||
set(HALIDE_INC ${HALIDE_BUILD_DIR}/include) | set(HALIDE_INC ${HALIDE_BUILD_DIR}/include) | ||||
file(MAKE_DIRECTORY ${HALIDE_INC}) | file(MAKE_DIRECTORY ${HALIDE_INC}) | ||||
add_library(libhalide STATIC IMPORTED GLOBAL) | add_library(libhalide STATIC IMPORTED GLOBAL) | ||||
add_dependencies(libhalide halide) | add_dependencies(libhalide halide) | ||||
set_target_properties( | |||||
libhalide PROPERTIES | |||||
IMPORTED_LOCATION ${HALIDE_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC} | |||||
) | |||||
set_target_properties(libhalide PROPERTIES IMPORTED_LOCATION ${HALIDE_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC}) | |||||
set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU) | set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU) | ||||
llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS}) | llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS}) | ||||
@@ -1,25 +1,31 @@ | |||||
if (MGE_USE_SYSTEM_LIB) | |||||
find_package(dnnl) | |||||
if (dnnl_FOUND) | |||||
message(STATUS "Using system provided MKL-DNN.") | |||||
set (MGE_USE_SYSTEM_MKLDNN ON) | |||||
return() | |||||
endif() | |||||
if(MGE_USE_SYSTEM_LIB) | |||||
find_package(dnnl) | |||||
if(dnnl_FOUND) | |||||
message(STATUS "Using system provided MKL-DNN.") | |||||
set(MGE_USE_SYSTEM_MKLDNN ON) | |||||
return() | |||||
endif() | |||||
endif() | endif() | ||||
option(DNNL_BUILD_TESTS "" OFF) | option(DNNL_BUILD_TESTS "" OFF) | ||||
option(DNNL_BUILD_EXAMPLES "" OFF) | option(DNNL_BUILD_EXAMPLES "" OFF) | ||||
# we do not want to use OMP now, so config to CPU mode | |||||
# if set to OMP, some dnnl algo will be more fast | |||||
set(DNNL_CPU_RUNTIME "SEQ" CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||||
# we do not want to use OMP now, so config to CPU mode if set to OMP, some dnnl algo | |||||
# will be more fast | |||||
set(DNNL_CPU_RUNTIME | |||||
"SEQ" | |||||
CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ") | |||||
if(MGE_BLAS STREQUAL "MKL") | if(MGE_BLAS STREQUAL "MKL") | ||||
option(_DNNL_USE_MKL "" ON) | |||||
set(MKLROOT ${MKL_ROOT_DIR} CACHE STRING "MKL ROOT FOR DNNL") | |||||
set(MKLLIB libmkl) | |||||
option(_DNNL_USE_MKL "" ON) | |||||
set(MKLROOT | |||||
${MKL_ROOT_DIR} | |||||
CACHE STRING "MKL ROOT FOR DNNL") | |||||
set(MKLLIB libmkl) | |||||
else() | else() | ||||
option(_DNNL_USE_MKL "" OFF) | |||||
option(_DNNL_USE_MKL "" OFF) | |||||
endif() | endif() | ||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra") | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra") | ||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra") | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra") | ||||
set(DNNL_LIBRARY_TYPE STATIC CACHE STRING "config dnnl to STATIC") | |||||
set(DNNL_LIBRARY_TYPE | |||||
STATIC | |||||
CACHE STRING "config dnnl to STATIC") | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn) | add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn) |
@@ -1,30 +1,28 @@ | |||||
# - Find the NumPy libraries | |||||
# This module finds if NumPy is installed, and sets the following variables | |||||
# indicating where it is. | |||||
# * Find the NumPy libraries This module finds if NumPy is installed, and sets the | |||||
# following variables indicating where it is. | |||||
# | # | ||||
# TODO: Update to provide the libraries and paths for linking npymath lib. | # TODO: Update to provide the libraries and paths for linking npymath lib. | ||||
# | # | ||||
# NUMPY_FOUND - was NumPy found | |||||
# NUMPY_VERSION - the version of NumPy found as a string | |||||
# NUMPY_VERSION_MAJOR - the major version number of NumPy | |||||
# NUMPY_VERSION_MINOR - the minor version number of NumPy | |||||
# NUMPY_VERSION_PATCH - the patch version number of NumPy | |||||
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 | |||||
# NUMPY_INCLUDE_DIR - path to the NumPy include files | |||||
# NUMPY_FOUND - was NumPy found NUMPY_VERSION - the version of | |||||
# NumPy found as a string NUMPY_VERSION_MAJOR - the major version number of NumPy | |||||
# NUMPY_VERSION_MINOR - the minor version number of NumPy NUMPY_VERSION_PATCH - | |||||
# the patch version number of NumPy NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is | |||||
# 10601 NUMPY_INCLUDE_DIR - path to the NumPy include files | |||||
unset(NUMPY_VERSION) | unset(NUMPY_VERSION) | ||||
unset(NUMPY_INCLUDE_DIR) | unset(NUMPY_INCLUDE_DIR) | ||||
if(PYTHONINTERP_FOUND) | if(PYTHONINTERP_FOUND) | ||||
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||||
"import numpy as n; print(n.__version__); print(n.get_include());" | |||||
execute_process( | |||||
COMMAND "${PYTHON_EXECUTABLE}" "-c" | |||||
"import numpy as n; print(n.__version__); print(n.get_include());" | |||||
RESULT_VARIABLE __result | RESULT_VARIABLE __result | ||||
OUTPUT_VARIABLE __output | OUTPUT_VARIABLE __output | ||||
OUTPUT_STRIP_TRAILING_WHITESPACE) | OUTPUT_STRIP_TRAILING_WHITESPACE) | ||||
if(__result MATCHES 0) | if(__result MATCHES 0) | ||||
string(REGEX REPLACE ";" "\\\\;" __values ${__output}) | string(REGEX REPLACE ";" "\\\\;" __values ${__output}) | ||||
string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||||
string(REGEX REPLACE "\r?\n" ";" __values ${__values}) | |||||
list(GET __values 0 NUMPY_VERSION) | list(GET __values 0 NUMPY_VERSION) | ||||
list(GET __values 1 NUMPY_INCLUDE_DIR) | list(GET __values 1 NUMPY_INCLUDE_DIR) | ||||
@@ -33,13 +31,18 @@ if(PYTHONINTERP_FOUND) | |||||
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) | set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1}) | ||||
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) | set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2}) | ||||
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) | set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3}) | ||||
math(EXPR NUMPY_VERSION_DECIMAL | |||||
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") | |||||
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||||
math( | |||||
EXPR | |||||
NUMPY_VERSION_DECIMAL | |||||
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}" | |||||
) | |||||
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR}) | |||||
else() | else() | ||||
unset(NUMPY_VERSION) | |||||
unset(NUMPY_INCLUDE_DIR) | |||||
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n") | |||||
unset(NUMPY_VERSION) | |||||
unset(NUMPY_INCLUDE_DIR) | |||||
message( | |||||
STATUS | |||||
"Requested NumPy version and include path, but got instead:\n${__output}\n") | |||||
endif() | endif() | ||||
endif() | endif() | ||||
else() | else() | ||||
@@ -47,8 +50,10 @@ else() | |||||
endif() | endif() | ||||
include(FindPackageHandleStandardArgs) | include(FindPackageHandleStandardArgs) | ||||
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||||
VERSION_VAR NUMPY_VERSION) | |||||
find_package_handle_standard_args( | |||||
NumPy | |||||
REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION | |||||
VERSION_VAR NUMPY_VERSION) | |||||
if(NUMPY_FOUND) | if(NUMPY_FOUND) | ||||
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") | message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})") | ||||
@@ -1,48 +1,50 @@ | |||||
if (MGE_USE_SYSTEM_LIB) | |||||
find_package(OpenBLAS) | |||||
set (MGE_USE_SYSTEM_OPENBLAS ON) | |||||
message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||||
add_library(libopenblas IMPORTED GLOBAL) | |||||
set_target_properties( | |||||
libopenblas PROPERTIES | |||||
IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS} | |||||
) | |||||
return() | |||||
if(MGE_USE_SYSTEM_LIB) | |||||
find_package(OpenBLAS) | |||||
set(MGE_USE_SYSTEM_OPENBLAS ON) | |||||
message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}") | |||||
add_library(libopenblas IMPORTED GLOBAL) | |||||
set_target_properties( | |||||
libopenblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_LIBRARIES} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS}) | |||||
return() | |||||
endif() | endif() | ||||
include(ExternalProject) | include(ExternalProject) | ||||
include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
set(OPENBLAS_DIR "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" CACHE STRING "OpenBLAS directory") | |||||
set(OPENBLAS_DIR | |||||
"${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" | |||||
CACHE STRING "OpenBLAS directory") | |||||
set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS) | set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS) | ||||
set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include) | set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include) | ||||
set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a) | set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a) | ||||
if(${CMAKE_GENERATOR} STREQUAL "Ninja") | if(${CMAKE_GENERATOR} STREQUAL "Ninja") | ||||
set(MAKE_COMMAND make) | |||||
set(MAKE_COMMAND make) | |||||
else() | else() | ||||
set(MAKE_COMMAND "$(MAKE)") | |||||
set(MAKE_COMMAND "$(MAKE)") | |||||
endif() | endif() | ||||
ExternalProject_add( | |||||
openblas | |||||
SOURCE_DIR ${OPENBLAS_DIR} | |||||
PREFIX ${OPENBLAS_BUILD_DIR} | |||||
CMAKE_GENERATOR "Unix Makefiles" | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
BUILD_COMMAND ${MAKE_COMMAND} | |||||
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE} | |||||
) | |||||
ExternalProject_Add( | |||||
openblas | |||||
SOURCE_DIR ${OPENBLAS_DIR} | |||||
PREFIX ${OPENBLAS_BUILD_DIR} | |||||
CMAKE_GENERATOR "Unix Makefiles" | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
-DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} | |||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
BUILD_COMMAND ${MAKE_COMMAND} | |||||
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE}) | |||||
file(MAKE_DIRECTORY ${OPENBLAS_INC}) | file(MAKE_DIRECTORY ${OPENBLAS_INC}) | ||||
add_library(libopenblas STATIC IMPORTED GLOBAL) | add_library(libopenblas STATIC IMPORTED GLOBAL) | ||||
add_dependencies(libopenblas openblas) | add_dependencies(libopenblas openblas) | ||||
set_target_properties( | set_target_properties( | ||||
libopenblas PROPERTIES | |||||
IMPORTED_LOCATION ${OPENBLAS_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include | |||||
) | |||||
libopenblas PROPERTIES IMPORTED_LOCATION ${OPENBLAS_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include) |
@@ -1,31 +1,31 @@ | |||||
find_library(ACLRT_LIBRARY | |||||
NAMES libascendcl.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES stub | |||||
DOC "ACL library." ) | |||||
find_library( | |||||
ACLRT_LIBRARY | |||||
NAMES libascendcl.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES stub | |||||
DOC "ACL library.") | |||||
if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND") | if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find ACLRT Library") | |||||
message(FATAL_ERROR "Can not find ACLRT Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH) | get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH) | ||||
find_path(ACLRT_INCLUDE_DIR | |||||
NAMES acl/acl.h | |||||
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to ACLRT include directory." ) | |||||
find_path( | |||||
ACLRT_INCLUDE_DIR | |||||
NAMES acl/acl.h | |||||
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to ACLRT include directory.") | |||||
if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND") | if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find ACLRT Library") | |||||
message(FATAL_ERROR "Can not find ACLRT Library") | |||||
endif() | endif() | ||||
add_library(libascendcl SHARED IMPORTED) | add_library(libascendcl SHARED IMPORTED) | ||||
set_target_properties(libascendcl PROPERTIES | |||||
IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR} | |||||
) | |||||
set_target_properties( | |||||
libascendcl PROPERTIES IMPORTED_LOCATION ${ACLRT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR}) | |||||
message(STATUS "Found ACLRT: ${__found_aclrt_root}") | message(STATUS "Found ACLRT: ${__found_aclrt_root}") | ||||
@@ -1,44 +1,57 @@ | |||||
find_library(CNDEV_LIBRARY | |||||
NAMES libcndev.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNDEV library." ) | |||||
find_library( | |||||
CNDEV_LIBRARY | |||||
NAMES libcndev.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNDEV library.") | |||||
if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND") | if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNDEV Library") | |||||
message(FATAL_ERROR "Can not find CNDEV Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH) | get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH) | ||||
find_path(CNDEV_INCLUDE_DIR | |||||
NAMES cndev.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNDEV include directory." ) | |||||
find_path( | |||||
CNDEV_INCLUDE_DIR | |||||
NAMES cndev.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNDEV include directory.") | |||||
if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND") | if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNDEV Library") | |||||
message(FATAL_ERROR "Can not find CNDEV Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 "${CNDEV_1}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 "${CNDEV_2}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 "${CNDEV_3}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 "${CNDEV_4}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 "${CNDEV_5}") | |||||
set(CNDEV_VERSION_STRING "${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 | |||||
REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 | |||||
REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 | |||||
REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 | |||||
REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$") | |||||
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 | |||||
REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 | |||||
"${CNDEV_1}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 | |||||
"${CNDEV_2}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 | |||||
"${CNDEV_3}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 | |||||
"${CNDEV_4}") | |||||
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 | |||||
"${CNDEV_5}") | |||||
set(CNDEV_VERSION_STRING | |||||
"${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}" | |||||
) | |||||
add_library(libcndev SHARED IMPORTED) | add_library(libcndev SHARED IMPORTED) | ||||
set_target_properties(libcndev PROPERTIES | |||||
IMPORTED_LOCATION ${CNDEV_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNDEV_INCLUDE_DIR} | |||||
) | |||||
message(STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") | |||||
set_target_properties( | |||||
libcndev PROPERTIES IMPORTED_LOCATION ${CNDEV_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${CNDEV_INCLUDE_DIR}) | |||||
message( | |||||
STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})") |
@@ -1,40 +1,49 @@ | |||||
find_library(CNLIGHT_LIBRARY | |||||
NAMES libcnlight.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNLIGHT library." ) | |||||
find_library( | |||||
CNLIGHT_LIBRARY | |||||
NAMES libcnlight.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNLIGHT library.") | |||||
if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") | if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH) | ||||
find_path(CNLIGHT_INCLUDE_DIR | |||||
NAMES cnlight.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNLIGHT include directory." ) | |||||
find_path( | |||||
CNLIGHT_INCLUDE_DIR | |||||
NAMES cnlight.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNLIGHT include directory.") | |||||
if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") | if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
message(FATAL_ERROR "Can not find CNLIGHT Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||||
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||||
set(CNLIGHT_VERSION_STRING "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR | |||||
REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR | |||||
REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH | |||||
REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||||
CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" | |||||
CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") | |||||
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" | |||||
CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") | |||||
set(CNLIGHT_VERSION_STRING | |||||
"${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") | |||||
add_library(libcnlight SHARED IMPORTED) | add_library(libcnlight SHARED IMPORTED) | ||||
set_target_properties(libcnlight PROPERTIES | |||||
IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR} | |||||
) | |||||
message(STATUS "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") | |||||
set_target_properties( | |||||
libcnlight PROPERTIES IMPORTED_LOCATION ${CNLIGHT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR}) | |||||
message( | |||||
STATUS | |||||
"Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") |
@@ -1,40 +1,48 @@ | |||||
find_library(CNML_LIBRARY | |||||
NAMES libcnml.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNML library." ) | |||||
find_library( | |||||
CNML_LIBRARY | |||||
NAMES libcnml.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNML library.") | |||||
if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND") | if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNML Library") | |||||
message(FATAL_ERROR "Can not find CNML Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH) | ||||
find_path(CNML_INCLUDE_DIR | |||||
NAMES cnml.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNML include directory." ) | |||||
find_path( | |||||
CNML_INCLUDE_DIR | |||||
NAMES cnml.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNML include directory.") | |||||
if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND") | if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNML Library") | |||||
message(FATAL_ERROR "Can not find CNML Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR "${CNML_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR "${CNML_MINOR}") | |||||
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH "${CNML_PATCH}") | |||||
set(CNML_VERSION_STRING "${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR | |||||
REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR | |||||
REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH | |||||
REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR | |||||
"${CNML_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR | |||||
"${CNML_MINOR}") | |||||
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH | |||||
"${CNML_PATCH}") | |||||
set(CNML_VERSION_STRING | |||||
"${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}") | |||||
add_library(libcnml SHARED IMPORTED) | add_library(libcnml SHARED IMPORTED) | ||||
set_target_properties(libcnml PROPERTIES | |||||
IMPORTED_LOCATION ${CNML_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNML_INCLUDE_DIR} | |||||
) | |||||
message(STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") | |||||
set_target_properties( | |||||
libcnml PROPERTIES IMPORTED_LOCATION ${CNML_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${CNML_INCLUDE_DIR}) | |||||
message( | |||||
STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})") |
@@ -1,80 +1,100 @@ | |||||
find_library(CNNL_LIBRARY | |||||
NAMES libcnnl.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNNL library." ) | |||||
find_library( | |||||
CNNL_LIBRARY | |||||
NAMES libcnnl.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNNL library.") | |||||
if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") | if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNNL Library") | |||||
message(FATAL_ERROR "Can not find CNNL Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH) | ||||
find_path(CNNL_INCLUDE_DIR | |||||
NAMES cnnl.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNNL include directory." ) | |||||
find_path( | |||||
CNNL_INCLUDE_DIR | |||||
NAMES cnnl.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNNL include directory.") | |||||
if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") | if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNNL Library") | |||||
message(FATAL_ERROR "Can not find CNNL Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR "${CNNL_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR "${CNNL_MINOR}") | |||||
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH "${CNNL_PATCH}") | |||||
set(CNNL_VERSION_STRING "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR | |||||
REGEX "^#define CNNL_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR | |||||
REGEX "^#define CNNL_MINOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH | |||||
REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR | |||||
"${CNNL_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR | |||||
"${CNNL_MINOR}") | |||||
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH | |||||
"${CNNL_PATCH}") | |||||
set(CNNL_VERSION_STRING | |||||
"${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") | |||||
add_library(libcnnl SHARED IMPORTED) | add_library(libcnnl SHARED IMPORTED) | ||||
set_target_properties(libcnnl PROPERTIES | |||||
IMPORTED_LOCATION ${CNNL_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_INCLUDE_DIR} | |||||
) | |||||
set_target_properties( | |||||
libcnnl PROPERTIES IMPORTED_LOCATION ${CNNL_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${CNNL_INCLUDE_DIR}) | |||||
message(STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||||
message( | |||||
STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") | |||||
find_library(CNNL_EXTRA_LIBRARY | |||||
NAMES libcnnl_extra.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNNL_EXTRA library." ) | |||||
find_library( | |||||
CNNL_EXTRA_LIBRARY | |||||
NAMES libcnnl_extra.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNNL_EXTRA library.") | |||||
if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") | if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH) | get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH) | ||||
find_path(CNNL_EXTRA_INCLUDE_DIR | |||||
NAMES cnnl_extra.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNNL_EXTRA include directory." ) | |||||
find_path( | |||||
CNNL_EXTRA_INCLUDE_DIR | |||||
NAMES cnnl_extra.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNNL_EXTRA include directory.") | |||||
if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") | if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
message(FATAL_ERROR "Can not find CNNL_EXTRA Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||||
set(CNNL_EXTRA_VERSION_STRING "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}") | |||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR | |||||
REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR | |||||
REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") | |||||
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH | |||||
REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" | |||||
CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" | |||||
CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") | |||||
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" | |||||
CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") | |||||
set(CNNL_EXTRA_VERSION_STRING | |||||
"${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}" | |||||
) | |||||
add_library(libcnnl_extra SHARED IMPORTED) | add_library(libcnnl_extra SHARED IMPORTED) | ||||
set_target_properties(libcnnl_extra PROPERTIES | |||||
IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR} | |||||
) | |||||
message(STATUS "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})") | |||||
set_target_properties( | |||||
libcnnl_extra PROPERTIES IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR}) | |||||
message( | |||||
STATUS | |||||
"Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})" | |||||
) |
@@ -1,40 +1,48 @@ | |||||
find_library(CNRT_LIBRARY | |||||
NAMES libcnrt.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNRT library." ) | |||||
find_library( | |||||
CNRT_LIBRARY | |||||
NAMES libcnrt.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CNRT library.") | |||||
if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND") | if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNRT Library") | |||||
message(FATAL_ERROR "Can not find CNRT Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH) | get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH) | ||||
find_path(CNRT_INCLUDE_DIR | |||||
NAMES cnrt.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNRT include directory." ) | |||||
find_path( | |||||
CNRT_INCLUDE_DIR | |||||
NAMES cnrt.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CNRT include directory.") | |||||
if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND") | if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CNRT Library") | |||||
message(FATAL_ERROR "Can not find CNRT Library") | |||||
endif() | endif() | ||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR "${CNRT_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR "${CNRT_MINOR}") | |||||
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH "${CNRT_PATCH}") | |||||
set(CNRT_VERSION_STRING "${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR | |||||
REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR | |||||
REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH | |||||
REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR | |||||
"${CNRT_MAJOR}") | |||||
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR | |||||
"${CNRT_MINOR}") | |||||
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH | |||||
"${CNRT_PATCH}") | |||||
set(CNRT_VERSION_STRING | |||||
"${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}") | |||||
add_library(libcnrt SHARED IMPORTED) | add_library(libcnrt SHARED IMPORTED) | ||||
set_target_properties(libcnrt PROPERTIES | |||||
IMPORTED_LOCATION ${CNRT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CNRT_INCLUDE_DIR} | |||||
) | |||||
message(STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") | |||||
set_target_properties( | |||||
libcnrt PROPERTIES IMPORTED_LOCATION ${CNRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${CNRT_INCLUDE_DIR}) | |||||
message( | |||||
STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})") |
@@ -1,2 +1,5 @@ | |||||
file(GLOB_RECURSE CPP_REDIS_SRCS ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp ${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||||
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes ${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) | |||||
file(GLOB_RECURSE CPP_REDIS_SRCS | |||||
${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp | |||||
${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp) | |||||
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes | |||||
${PROJECT_SOURCE_DIR}/third_party/tacopie/includes) |
@@ -1,20 +1,20 @@ | |||||
if (MGE_USE_SYSTEM_LIB) | |||||
find_package(Cpuinfo) | |||||
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||||
add_library(libcpuinfo IMPORTED GLOBAL) | |||||
set_target_properties( | |||||
libcpuinfo PROPERTIES | |||||
IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS} | |||||
) | |||||
return() | |||||
if(MGE_USE_SYSTEM_LIB) | |||||
find_package(Cpuinfo) | |||||
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}") | |||||
add_library(libcpuinfo IMPORTED GLOBAL) | |||||
set_target_properties( | |||||
libcpuinfo PROPERTIES IMPORTED_LOCATION ${cpuinfo_LIBRARIES} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS}) | |||||
return() | |||||
endif() | endif() | ||||
SET(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||||
OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||||
OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||||
OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||||
OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||||
set(CPUINFO_LIBRARY_TYPE | |||||
"static" | |||||
CACHE STRING "Type of cpuinfo library (shared, static, or default) to build") | |||||
option(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF) | |||||
option(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF) | |||||
option(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF) | |||||
option(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF) | |||||
include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include") | include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include") | ||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo | |||||
${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL) |
@@ -1,73 +1,83 @@ | |||||
find_package(PkgConfig) | find_package(PkgConfig) | ||||
if(${PkgConfig_FOUND}) | if(${PkgConfig_FOUND}) | ||||
pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||||
pkg_check_modules(PC_CUDNN QUIET CUDNN) | |||||
endif() | endif() | ||||
if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||||
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||||
if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "") | |||||
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR}) | |||||
endif() | endif() | ||||
if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED) | if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED) | ||||
find_library(CUDNN_LIBRARY | |||||
NAMES libcudnn_static.a cudnn.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CUDNN library." ) | |||||
find_library( | |||||
CUDNN_LIBRARY | |||||
NAMES libcudnn_static.a cudnn.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||||
${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CUDNN library.") | |||||
else() | else() | ||||
find_library(CUDNN_LIBRARY | |||||
NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CUDNN library." ) | |||||
find_library( | |||||
CUDNN_LIBRARY | |||||
NAMES libcudnn.so libcudnn.dylib cudnn64.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} | |||||
${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "CUDNN library.") | |||||
endif() | endif() | ||||
if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND") | if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||||
) | |||||
endif() | endif() | ||||
get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH) | get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH) | ||||
find_path(CUDNN_INCLUDE_DIR | |||||
NAMES cudnn.h | |||||
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CUDNN include directory." ) | |||||
find_path( | |||||
CUDNN_INCLUDE_DIR | |||||
NAMES cudnn.h | |||||
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} | |||||
${__found_cudnn_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to CUDNN include directory.") | |||||
if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND") | if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env" | |||||
) | |||||
endif() | endif() | ||||
if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h) | if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h) | ||||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS) | |||||
else() | else() | ||||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||||
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) | |||||
endif() | endif() | ||||
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" | |||||
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" | |||||
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") | |||||
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" | |||||
CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" | |||||
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") | |||||
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" | |||||
CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" | |||||
CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") | |||||
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" CUDNN_MAJOR_VERSION | |||||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" CUDNN_MAJOR_VERSION | |||||
"${CUDNN_MAJOR_VERSION}") | |||||
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" CUDNN_MINOR_VERSION | |||||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" CUDNN_MINOR_VERSION | |||||
"${CUDNN_MINOR_VERSION}") | |||||
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" CUDNN_PATCH_VERSION | |||||
"${CUDNN_VERSION_FILE_CONTENTS}") | |||||
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" CUDNN_PATCH_VERSION | |||||
"${CUDNN_PATCH_VERSION}") | |||||
set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}) | set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION}) | ||||
if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
add_library(libcudnn STATIC IMPORTED) | |||||
add_library(libcudnn STATIC IMPORTED) | |||||
else() | else() | ||||
add_library(libcudnn SHARED IMPORTED) | |||||
add_library(libcudnn SHARED IMPORTED) | |||||
endif() | endif() | ||||
set_target_properties(libcudnn PROPERTIES | |||||
IMPORTED_LOCATION ${CUDNN_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR}) | |||||
set_target_properties( | |||||
libcudnn PROPERTIES IMPORTED_LOCATION ${CUDNN_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${CUDNN_INCLUDE_DIR}) | |||||
message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})") | message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})") |
@@ -1,27 +1,47 @@ | |||||
if (MGE_USE_SYSTEM_LIB) | |||||
find_package(Flatbuffers REQUIRED) | |||||
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||||
include(cmake/BuildFlatBuffers.cmake) | |||||
return() | |||||
if(MGE_USE_SYSTEM_LIB) | |||||
find_package(Flatbuffers REQUIRED) | |||||
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}") | |||||
include(cmake/BuildFlatBuffers.cmake) | |||||
return() | |||||
endif() | endif() | ||||
if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
message(DEBUG "add flags flatc for clang-cl build") | |||||
set(FLATC_FLAGS "") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal") | |||||
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||||
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override") | |||||
endif() | |||||
message(DEBUG "add flags flatc for clang-cl build") | |||||
set(FLATC_FLAGS "") | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors" | |||||
) | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal" | |||||
) | |||||
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0") | |||||
set(FLATC_FLAGS | |||||
"${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override" | |||||
) | |||||
endif() | |||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}") | |||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}") | |||||
endif() | endif() | ||||
option(FLATBUFFERS_BUILD_TESTS "" OFF) | option(FLATBUFFERS_BUILD_TESTS "" OFF) | ||||
@@ -1 +1,2 @@ | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags) | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags | |||||
${CMAKE_CURRENT_BINARY_DIR}/gflags) |
@@ -1,2 +1,2 @@ | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) | |||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest | |||||
${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL) |
@@ -1,88 +1,136 @@ | |||||
# - Find the llvm/mlir libraries | |||||
# This module finds if llvm/mlir is installed, or build llvm/mlir from source. | |||||
# This module sets the following variables. | |||||
# * Find the llvm/mlir libraries This module finds if llvm/mlir is installed, or build | |||||
# llvm/mlir from source. This module sets the following variables. | |||||
# | # | ||||
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files | |||||
# MLIR_LLVM_LIBS - path to the LLVM/MLIR libraries | |||||
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files MLIR_LLVM_LIBS - path | |||||
# to the LLVM/MLIR libraries | |||||
# | # | ||||
# This module define the following functions. | # This module define the following functions. | ||||
# | # | ||||
# external_tablegen_library - created interface library which depends on tablegen outputs | |||||
# external_tablegen_library - created interface library which depends on tablegen | |||||
# outputs | |||||
include(CMakeParseArguments) | include(CMakeParseArguments) | ||||
function(external_tablegen_library) | function(external_tablegen_library) | ||||
cmake_parse_arguments( | |||||
_RULE | |||||
"TESTONLY" | |||||
"NAME;TBLGEN" | |||||
"SRCS;INCLUDES;OUTS" | |||||
${ARGN} | |||||
) | |||||
cmake_parse_arguments(_RULE "TESTONLY" "NAME;TBLGEN" "SRCS;INCLUDES;OUTS" ${ARGN}) | |||||
if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||||
return() | |||||
endif() | |||||
if(_RULE_TESTONLY AND NOT MGE_WITH_TEST) | |||||
return() | |||||
endif() | |||||
set(_NAME ${_RULE_NAME}) | |||||
set(_NAME ${_RULE_NAME}) | |||||
set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||||
set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||||
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||||
set(_OUTPUTS) | |||||
while(_RULE_OUTS) | |||||
list(GET _RULE_OUTS 0 _COMMAND) | |||||
list(REMOVE_AT _RULE_OUTS 0) | |||||
list(GET _RULE_OUTS 0 _FILE) | |||||
list(REMOVE_AT _RULE_OUTS 0) | |||||
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||||
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||||
endwhile() | |||||
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||||
set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS}) | |||||
set(_INCLUDE_DIRS ${_RULE_INCLUDES}) | |||||
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I") | |||||
set(_OUTPUTS) | |||||
while(_RULE_OUTS) | |||||
list(GET _RULE_OUTS 0 _COMMAND) | |||||
list(REMOVE_AT _RULE_OUTS 0) | |||||
list(GET _RULE_OUTS 0 _FILE) | |||||
list(REMOVE_AT _RULE_OUTS 0) | |||||
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS}) | |||||
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE}) | |||||
endwhile() | |||||
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS}) | |||||
add_library(${_NAME} INTERFACE) | |||||
add_dependencies(${_NAME} ${_NAME}_target) | |||||
add_library(${_NAME} INTERFACE) | |||||
add_dependencies(${_NAME} ${_NAME}_target) | |||||
target_include_directories(${_NAME} INTERFACE | |||||
"$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||||
target_include_directories(${_NAME} INTERFACE "$<BUILD_INTERFACE:${_RULE_INCLUDES}>") | |||||
install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||||
install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS}) | |||||
endfunction() | endfunction() | ||||
set(LLVM_LIBS LLVMCore LLVMSupport LLVMX86CodeGen LLVMOrcJIT LLVMNVPTXCodeGen LLVMNVPTXDesc LLVMNVPTXInfo) | |||||
set(MLIR_CORE_LIBS MLIRAnalysis MLIRExecutionEngine MLIRIR MLIRParser MLIRPass MLIRSideEffectInterfaces MLIRTransforms) | |||||
set(MLIR_DIALECT_LIBS MLIRAsync MLIRAVX512 MLIRGPU MLIRLLVMAVX512 MLIRNVVMIR MLIROpenACC MLIRPDL MLIRPDLInterp MLIRQuant MLIRROCDLIR MLIRSDBM MLIRShape MLIRSPIRV MLIRStandardOpsTransforms MLIRTosa) | |||||
set(MLIR_CONVERSION_LIBS MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||||
set(LLVM_LIBS | |||||
LLVMCore | |||||
LLVMSupport | |||||
LLVMX86CodeGen | |||||
LLVMOrcJIT | |||||
LLVMNVPTXCodeGen | |||||
LLVMNVPTXDesc | |||||
LLVMNVPTXInfo) | |||||
set(MLIR_CORE_LIBS | |||||
MLIRAnalysis | |||||
MLIRExecutionEngine | |||||
MLIRIR | |||||
MLIRParser | |||||
MLIRPass | |||||
MLIRSideEffectInterfaces | |||||
MLIRTransforms) | |||||
set(MLIR_DIALECT_LIBS | |||||
MLIRAsync | |||||
MLIRAVX512 | |||||
MLIRGPU | |||||
MLIRLLVMAVX512 | |||||
MLIRNVVMIR | |||||
MLIROpenACC | |||||
MLIRPDL | |||||
MLIRPDLInterp | |||||
MLIRQuant | |||||
MLIRROCDLIR | |||||
MLIRSDBM | |||||
MLIRShape | |||||
MLIRSPIRV | |||||
MLIRStandardOpsTransforms | |||||
MLIRTosa) | |||||
set(MLIR_CONVERSION_LIBS | |||||
MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms | |||||
MLIRGPUToNVVMTransforms MLIRSCFToStandard) | |||||
set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR) | set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR) | ||||
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} ${MLIR_TRANSLATION_LIBS}) | |||||
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} | |||||
${MLIR_TRANSLATION_LIBS}) | |||||
set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS}) | set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS}) | ||||
function(add_mge_mlir_src_dep llvm_monorepo_path) | function(add_mge_mlir_src_dep llvm_monorepo_path) | ||||
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||||
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||||
set(CMAKE_BUILD_TYPE "Debug") | |||||
endif() | |||||
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE) | |||||
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}") | |||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) | |||||
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES | |||||
"^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") | |||||
set(CMAKE_BUILD_TYPE "Debug") | |||||
endif() | |||||
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) | |||||
set(BUILD_SHARED_LIBS | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||||
add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL) | |||||
# Reset CMAKE_BUILD_TYPE to its previous setting | |||||
set(CMAKE_BUILD_TYPE "${_CMAKE_BUILD_TYPE}" CACHE STRING "Build type" FORCE) | |||||
# Reset BUILD_SHARED_LIBS to its previous setting | |||||
set(BUILD_SHARED_LIBS ${_CMAKE_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libraries" FORCE) | |||||
# Reset CMAKE_BUILD_TYPE to its previous setting | |||||
set(CMAKE_BUILD_TYPE | |||||
"${_CMAKE_BUILD_TYPE}" | |||||
CACHE STRING "Build type" FORCE) | |||||
# Reset BUILD_SHARED_LIBS to its previous setting | |||||
set(BUILD_SHARED_LIBS | |||||
${_CMAKE_BUILD_SHARED_LIBS} | |||||
CACHE BOOL "Build shared libraries" FORCE) | |||||
endfunction() | endfunction() | ||||
# llvm build options | # llvm build options | ||||
set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "" FORCE) | |||||
set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "" FORCE) | |||||
set(LLVM_ENABLE_RTTI ${MGE_ENABLE_RTTI} CACHE BOOL "" FORCE) | |||||
set(LLVM_TARGETS_TO_BUILD "X86;NVPTX;AArch64;ARM" CACHE STRING "" FORCE) | |||||
set(LLVM_ENABLE_PROJECTS "mlir" CACHE STRING "" FORCE) | |||||
set(LLVM_INCLUDE_EXAMPLES | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_TESTS | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_DOCS | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_ENABLE_BINDINGS | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_INCLUDE_BENCHMARKS | |||||
OFF | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_ENABLE_RTTI | |||||
${MGE_ENABLE_RTTI} | |||||
CACHE BOOL "" FORCE) | |||||
set(LLVM_TARGETS_TO_BUILD | |||||
"X86;NVPTX;AArch64;ARM" | |||||
CACHE STRING "" FORCE) | |||||
set(LLVM_ENABLE_PROJECTS | |||||
"mlir" | |||||
CACHE STRING "" FORCE) | |||||
set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm) | set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm) | ||||
add_mge_mlir_src_dep("third_party/llvm-project") | add_mge_mlir_src_dep("third_party/llvm-project") | ||||
@@ -91,6 +139,5 @@ set(MLIR_LLVM_INCLUDE_DIR | |||||
${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include | ${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include | ||||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include | ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include | ||||
${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include | ${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include | ||||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include | |||||
) | |||||
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include) | |||||
set(MLIR_TABLEGEN_EXE mlir-tblgen) | set(MLIR_TABLEGEN_EXE mlir-tblgen) |
@@ -1,54 +1,64 @@ | |||||
find_library(MAGICMIND_LIBRARY | |||||
NAMES libmagicmind.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "MAGICMIND library." ) | |||||
find_library( | |||||
MAGICMIND_LIBRARY | |||||
NAMES libmagicmind.so | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "MAGICMIND library.") | |||||
if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") | if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
endif() | endif() | ||||
get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH) | get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH) | ||||
find_path(MAGICMIND_INCLUDE_DIR | |||||
NAMES common.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to MAGICMIND include directory." ) | |||||
find_path( | |||||
MAGICMIND_INCLUDE_DIR | |||||
NAMES common.h | |||||
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to MAGICMIND include directory.") | |||||
if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") | if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
message(FATAL_ERROR "Can not find MAGICMIND Library") | |||||
endif() | endif() | ||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR | |||||
REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR | |||||
REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") | |||||
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH | |||||
REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") | |||||
string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||||
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||||
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||||
set(MAGICMIND_VERSION_STRING "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||||
string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" | |||||
MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") | |||||
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" | |||||
MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") | |||||
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" | |||||
MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") | |||||
set(MAGICMIND_VERSION_STRING | |||||
"${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") | |||||
add_library(libmagicmind SHARED IMPORTED) | add_library(libmagicmind SHARED IMPORTED) | ||||
set_target_properties(libmagicmind PROPERTIES | |||||
IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR} | |||||
) | |||||
set_target_properties( | |||||
libmagicmind PROPERTIES IMPORTED_LOCATION ${MAGICMIND_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR}) | |||||
message(STATUS "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})") | |||||
message( | |||||
STATUS | |||||
"Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})" | |||||
) | |||||
find_library(MAGICMIND_RUNTIME_LIBRARY | |||||
NAMES libmagicmind_runtime.so | |||||
PATHS "${__found_magicmind_root}/lib64" | |||||
) | |||||
find_library( | |||||
MAGICMIND_RUNTIME_LIBRARY | |||||
NAMES libmagicmind_runtime.so | |||||
PATHS "${__found_magicmind_root}/lib64") | |||||
if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") | if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||||
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") | |||||
else() | else() | ||||
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||||
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") | |||||
endif() | endif() | ||||
add_library(libmagicmind_runtime SHARED IMPORTED) | add_library(libmagicmind_runtime SHARED IMPORTED) | ||||
set_target_properties(libmagicmind_runtime PROPERTIES | |||||
IMPORTED_LOCATION ${MAGICMIND_RUNTIME_LIBRARY} | |||||
) | |||||
set_target_properties(libmagicmind_runtime PROPERTIES IMPORTED_LOCATION | |||||
${MAGICMIND_RUNTIME_LIBRARY}) |
@@ -1,77 +1,83 @@ | |||||
find_path(MKL_ROOT_DIR | |||||
include/mkl_cblas.h | |||||
PATHS | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||||
$ENV{MKLDIR} | |||||
/opt/intel/mkl/*/ | |||||
/opt/intel/cmkl/*/ | |||||
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal | |||||
) | |||||
find_path( | |||||
MKL_ROOT_DIR include/mkl_cblas.h | |||||
PATHS ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH} | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library | |||||
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32 | |||||
$ENV{MKLDIR} | |||||
/opt/intel/mkl/*/ | |||||
/opt/intel/cmkl/*/ | |||||
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal) | |||||
if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND") | if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find MKL") | |||||
message(FATAL_ERROR "Can not find MKL") | |||||
endif() | endif() | ||||
message(STATUS "Build with MKL in ${MKL_ROOT_DIR}") | message(STATUS "Build with MKL in ${MKL_ROOT_DIR}") | ||||
find_path(MKL_INCLUDE_DIR | |||||
mkl_cblas.h | |||||
PATHS | |||||
${MKL_ROOT_DIR}/include | |||||
${INCLUDE_INSTALL_DIR} | |||||
) | |||||
find_path(MKL_INCLUDE_DIR mkl_cblas.h PATHS ${MKL_ROOT_DIR}/include | |||||
${INCLUDE_INSTALL_DIR}) | |||||
option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON) | option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON) | ||||
if(MGE_MKL_USE_STATIC) | if(MGE_MKL_USE_STATIC) | ||||
find_library(MKL_CORE_LIBRARY | |||||
NAMES libmkl_core.a mkl_core.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library( | |||||
MKL_CORE_LIBRARY | |||||
NAMES libmkl_core.a mkl_core.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library(MKL_SEQUENTIAL_LIBRARY | |||||
NAMES libmkl_sequential.a mkl_sequential.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library( | |||||
MKL_SEQUENTIAL_LIBRARY | |||||
NAMES libmkl_sequential.a mkl_sequential.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
if(${MGE_ARCH} STREQUAL "x86_64") | |||||
find_library(MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
elseif(${MGE_ARCH} STREQUAL "i386") | |||||
find_library(MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
endif() | |||||
if(${MGE_ARCH} STREQUAL "x86_64") | |||||
find_library( | |||||
MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
elseif(${MGE_ARCH} STREQUAL "i386") | |||||
find_library( | |||||
MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
endif() | |||||
add_library(libmkl INTERFACE IMPORTED) | |||||
if(UNIX AND NOT APPLE) | |||||
target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group) | |||||
else() | |||||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
endif() | |||||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
add_library(libmkl INTERFACE IMPORTED) | |||||
if(UNIX AND NOT APPLE) | |||||
target_link_libraries( | |||||
libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||||
${MKL_IPL_LIBRARY} -Wl,--end-group) | |||||
else() | |||||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} | |||||
${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
endif() | |||||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
else() | else() | ||||
find_library(MKL_CORE_LIBRARY | |||||
NAMES libmkl_core.so libmkl_core.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library( | |||||
MKL_CORE_LIBRARY | |||||
NAMES libmkl_core.so libmkl_core.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library(MKL_SEQUENTIAL_LIBRARY | |||||
NAMES libmkl_sequential.so libmkl_sequential.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
find_library( | |||||
MKL_SEQUENTIAL_LIBRARY | |||||
NAMES libmkl_sequential.so libmkl_sequential.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
if(${MGE_ARCH} STREQUAL "x86_64") | |||||
find_library(MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
elseif(${MGE_ARCH} STREQUAL "x86_32") | |||||
find_library(MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
endif() | |||||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY}) | |||||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
if(${MGE_ARCH} STREQUAL "x86_64") | |||||
find_library( | |||||
MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
elseif(${MGE_ARCH} STREQUAL "x86_32") | |||||
find_library( | |||||
MKL_IPL_LIBRARY | |||||
NAMES libmkl_intel_32.so libmkl_intel_32.dylib | |||||
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/) | |||||
endif() | |||||
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} | |||||
${MKL_IPL_LIBRARY}) | |||||
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR}) | |||||
endif() | endif() | ||||
if(${MGE_ARCH} STREQUAL "x86_64") | if(${MGE_ARCH} STREQUAL "x86_64") | ||||
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||||
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64) | |||||
endif() | endif() |
@@ -1,70 +1,83 @@ | |||||
function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR) | function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR) | ||||
if(NOT ARGN) | |||||
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||||
return() | |||||
endif() | |||||
if(NOT ARGN) | |||||
message( | |||||
SEND_ERROR | |||||
"Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files") | |||||
return() | |||||
endif() | |||||
set(${SRCS}) | |||||
set(${HDRS}) | |||||
foreach(FIL ${ARGN}) | |||||
set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||||
get_filename_component(FIL_WE ${FIL} NAME_WE) | |||||
get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||||
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||||
set(${SRCS}) | |||||
set(${HDRS}) | |||||
foreach(FIL ${ARGN}) | |||||
set(ABS_FIL ${ROOT_DIR}/${FIL}) | |||||
get_filename_component(FIL_WE ${FIL} NAME_WE) | |||||
get_filename_component(FIL_DIR ${ABS_FIL} PATH) | |||||
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR}) | |||||
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||||
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||||
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") | |||||
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") | |||||
add_custom_command( | |||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||||
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||||
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} | |||||
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||||
DEPENDS ${ABS_FIL} libprotobuf | |||||
COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||||
VERBATIM) | |||||
endforeach() | |||||
add_custom_command( | |||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" | |||||
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" | |||||
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} | |||||
-I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS} | |||||
DEPENDS ${ABS_FIL} libprotobuf | |||||
COMMENT "Running C++ protocol buffer compiler on ${FIL}" | |||||
VERBATIM) | |||||
endforeach() | |||||
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||||
set(${SRCS} ${${SRCS}} PARENT_SCOPE) | |||||
set(${HDRS} ${${HDRS}} PARENT_SCOPE) | |||||
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) | |||||
set(${SRCS} | |||||
${${SRCS}} | |||||
PARENT_SCOPE) | |||||
set(${HDRS} | |||||
${${HDRS}} | |||||
PARENT_SCOPE) | |||||
endfunction() | endfunction() | ||||
if(MGE_USE_SYSTEM_LIB) | if(MGE_USE_SYSTEM_LIB) | ||||
find_package(Protobuf) | |||||
if(Protobuf_FOUND) | |||||
add_library(libprotobuf INTERFACE) | |||||
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||||
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||||
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||||
set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||||
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||||
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||||
return() | |||||
endif() | |||||
find_package(Protobuf) | |||||
if(Protobuf_FOUND) | |||||
add_library(libprotobuf INTERFACE) | |||||
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES}) | |||||
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS}) | |||||
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY) | |||||
set(PROTOBUF_ROOT ${Protobuf_ROOT}) | |||||
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) | |||||
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS}) | |||||
return() | |||||
endif() | |||||
endif() | endif() | ||||
include(ExternalProject) | include(ExternalProject) | ||||
include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
set(PROTOBUF_DIR "${PROJECT_SOURCE_DIR}/third_party/protobuf" CACHE STRING "protobuf directory") | |||||
set(PROTOBUF_DIR | |||||
"${PROJECT_SOURCE_DIR}/third_party/protobuf" | |||||
CACHE STRING "protobuf directory") | |||||
set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf) | set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf) | ||||
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") | ||||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a) | |||||
else() | else() | ||||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||||
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a) | |||||
endif() | endif() | ||||
set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc) | set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc) | ||||
ExternalProject_add( | |||||
protobuf | |||||
SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||||
PREFIX ${PROTOBUF_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} -Dprotobuf_BUILD_EXAMPLES=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE} | |||||
) | |||||
ExternalProject_Add( | |||||
protobuf | |||||
SOURCE_DIR ${PROTOBUF_DIR}/cmake | |||||
PREFIX ${PROTOBUF_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} | |||||
-Dprotobuf_BUILD_EXAMPLES=OFF | |||||
-Dprotobuf_BUILD_TESTS=OFF | |||||
-DBUILD_SHARED_LIBS=OFF | |||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON | |||||
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE}) | |||||
set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include) | set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include) | ||||
file(MAKE_DIRECTORY ${PROTOBUF_INC}) | file(MAKE_DIRECTORY ${PROTOBUF_INC}) | ||||
@@ -72,19 +85,14 @@ file(MAKE_DIRECTORY ${PROTOBUF_INC}) | |||||
add_library(libprotobuf STATIC IMPORTED GLOBAL) | add_library(libprotobuf STATIC IMPORTED GLOBAL) | ||||
add_dependencies(libprotobuf protobuf) | add_dependencies(libprotobuf protobuf) | ||||
set_target_properties( | set_target_properties( | ||||
libprotobuf PROPERTIES | |||||
IMPORTED_LOCATION ${PROTOBUF_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include | |||||
) | |||||
libprotobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include) | |||||
add_executable(protoc IMPORTED GLOBAL) | add_executable(protoc IMPORTED GLOBAL) | ||||
add_dependencies(protoc protobuf) | add_dependencies(protoc protobuf) | ||||
set_target_properties( | |||||
protoc PROPERTIES | |||||
IMPORTED_LOCATION ${PROTOBUF_BUILD_DIR}/bin/protoc | |||||
) | |||||
set_target_properties(protoc PROPERTIES IMPORTED_LOCATION | |||||
${PROTOBUF_BUILD_DIR}/bin/protoc) | |||||
set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR}) | set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR}) | ||||
set(PROTOBUF_PROTOC_EXECUTABLE protoc) | set(PROTOBUF_PROTOC_EXECUTABLE protoc) | ||||
set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include) | set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include) | ||||
@@ -1,28 +1,34 @@ | |||||
if(NOT DEFINED HIP_PATH) | if(NOT DEFINED HIP_PATH) | ||||
if(NOT DEFINED ENV{HIP_PATH}) | |||||
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") | |||||
else() | |||||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") | |||||
endif() | |||||
if(NOT DEFINED ENV{HIP_PATH}) | |||||
set(HIP_PATH | |||||
"/opt/rocm/hip" | |||||
CACHE PATH "Path to which HIP has been installed") | |||||
else() | |||||
set(HIP_PATH | |||||
$ENV{HIP_PATH} | |||||
CACHE PATH "Path to which HIP has been installed") | |||||
endif() | |||||
endif() | endif() | ||||
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) | set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) | ||||
find_package(HIP QUIET) | find_package(HIP QUIET) | ||||
if (HIP_FOUND) | |||||
message(STATUS "Found HIP: " ${HIP_VERSION}) | |||||
if(HIP_FOUND) | |||||
message(STATUS "Found HIP: " ${HIP_VERSION}) | |||||
else() | else() | ||||
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.") | |||||
message( | |||||
FATAL_ERROR | |||||
"Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location." | |||||
) | |||||
endif() | endif() | ||||
if (${HIP_VERSION} VERSION_LESS 3.0) | |||||
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||||
if(${HIP_VERSION} VERSION_LESS 3.0) | |||||
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.") | |||||
endif() | endif() | ||||
macro(hipconfig_get_option variable option) | macro(hipconfig_get_option variable option) | ||||
if(NOT DEFINED ${variable}) | |||||
execute_process( | |||||
COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||||
OUTPUT_VARIABLE ${variable}) | |||||
endif() | |||||
if(NOT DEFINED ${variable}) | |||||
execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option} | |||||
OUTPUT_VARIABLE ${variable}) | |||||
endif() | |||||
endmacro() | endmacro() | ||||
hipconfig_get_option(HIP_COMPILER "--compiler") | hipconfig_get_option(HIP_COMPILER "--compiler") | ||||
@@ -31,30 +37,33 @@ hipconfig_get_option(HIP_CPP_CONFIG "--cpp_config") | |||||
separate_arguments(HIP_CPP_CONFIG) | separate_arguments(HIP_CPP_CONFIG) | ||||
foreach(hip_config_item ${HIP_CPP_CONFIG}) | foreach(hip_config_item ${HIP_CPP_CONFIG}) | ||||
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||||
if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||||
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||||
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__") | |||||
if(${hip_config_item} STREQUAL "-D${macro_name}=") | |||||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n") | |||||
set(HIP_CPP_UNDEFINE | |||||
"${HIP_CPP_UNDEFINE}\ | |||||
#ifdef ${macro_name}\n#undef ${macro_name}\n\ | #ifdef ${macro_name}\n#undef ${macro_name}\n\ | ||||
#else\n#error\n\ | #else\n#error\n\ | ||||
#endif\n") | #endif\n") | ||||
elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||||
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\ | |||||
elseif(${hip_config_item} STREQUAL "-D${macro_name}") | |||||
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n") | |||||
set(HIP_CPP_UNDEFINE | |||||
"${HIP_CPP_UNDEFINE}\ | |||||
#ifdef ${macro_name}\n#undef ${macro_name}\n\ | #ifdef ${macro_name}\n#undef ${macro_name}\n\ | ||||
#else\n#error\n\ | #else\n#error\n\ | ||||
#endif\n") | #endif\n") | ||||
endif() | |||||
endforeach() | |||||
endif() | |||||
endforeach() | |||||
endforeach() | endforeach() | ||||
message(STATUS "Using HIP compiler ${HIP_COMPILER}") | message(STATUS "Using HIP compiler ${HIP_COMPILER}") | ||||
if(${HIP_COMPILER} STREQUAL "hcc") | if(${HIP_COMPILER} STREQUAL "hcc") | ||||
set(MGE_ROCM_LIBS hip_hcc) | |||||
message(WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||||
elseif (${HIP_COMPILER} STREQUAL "clang") | |||||
set(MGE_ROCM_LIBS amdhip64) | |||||
set(MGE_ROCM_LIBS hip_hcc) | |||||
message( | |||||
WARNING "hcc is not well supported, please modify link.txt to link with hipcc") | |||||
elseif(${HIP_COMPILER} STREQUAL "clang") | |||||
set(MGE_ROCM_LIBS amdhip64) | |||||
endif() | endif() | ||||
list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand) | list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand) | ||||
@@ -63,26 +72,28 @@ set(HIP_INCLUDE_DIR ${HIP_ROOT_DIR}/../include) | |||||
set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib) | set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib) | ||||
function(find_rocm_library name dirname include library) | function(find_rocm_library name dirname include library) | ||||
find_path(${name}_LIBRARY_DIR | |||||
NAMES ${library} | |||||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
PATH_SUFFIXES lib lib/x86_64 | |||||
DOC "Path to ${name} library directory") | |||||
find_path( | |||||
${name}_LIBRARY_DIR | |||||
NAMES ${library} | |||||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
PATH_SUFFIXES lib lib/x86_64 | |||||
DOC "Path to ${name} library directory") | |||||
if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||||
message(FATAL_ERROR "Can not find ${name} library") | |||||
endif() | |||||
if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$") | |||||
message(FATAL_ERROR "Can not find ${name} library") | |||||
endif() | |||||
find_path(${name}_INCLUDE_DIR | |||||
NAMES ${include} | |||||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
PATH_SUFFIXES include | |||||
DOC "Path to ${name} include directory") | |||||
find_path( | |||||
${name}_INCLUDE_DIR | |||||
NAMES ${include} | |||||
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}" | |||||
PATH_SUFFIXES include | |||||
DOC "Path to ${name} include directory") | |||||
if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||||
message(FATAL_ERROR "Can not find ${name} include") | |||||
endif() | |||||
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||||
if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$") | |||||
message(FATAL_ERROR "Can not find ${name} include") | |||||
endif() | |||||
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}") | |||||
endfunction() | endfunction() | ||||
find_rocm_library(MIOPEN miopen miopen libMIOpen.so) | find_rocm_library(MIOPEN miopen miopen libMIOpen.so) | ||||
@@ -1,166 +1,189 @@ | |||||
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||||
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||||
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "") | |||||
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR}) | |||||
endif() | endif() | ||||
if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
find_library(TRT_LIBRARY | |||||
NAMES libnvinfer_static.a nvinfer.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT library." ) | |||||
find_library(TRT_PLUGIN_LIBRARY | |||||
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT plugin library." ) | |||||
find_library( | |||||
TRT_LIBRARY | |||||
NAMES libnvinfer_static.a nvinfer.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT library.") | |||||
find_library( | |||||
TRT_PLUGIN_LIBRARY | |||||
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT plugin library.") | |||||
else() | else() | ||||
find_library(TRT_LIBRARY | |||||
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT library." ) | |||||
find_library(TRT_PLUGIN_LIBRARY | |||||
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT plugin library." ) | |||||
find_library( | |||||
TRT_LIBRARY | |||||
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT library.") | |||||
find_library( | |||||
TRT_PLUGIN_LIBRARY | |||||
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll | |||||
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX} | |||||
HINTS ${ALTER_LIBRARY_PATHS} | |||||
PATH_SUFFIXES lib lib64 | |||||
DOC "TRT plugin library.") | |||||
endif() | endif() | ||||
if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND") | if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
) | |||||
endif() | endif() | ||||
if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND") | if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
) | |||||
endif() | endif() | ||||
get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH) | get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH) | ||||
find_path(TRT_INCLUDE_DIR | |||||
NAMES NvInfer.h | |||||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to TRT include directory." ) | |||||
find_path(TRT_PLUGIN_INCLUDE_DIR | |||||
NAMES NvInferPlugin.h | |||||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to TRT plugin include directory." ) | |||||
find_path( | |||||
TRT_INCLUDE_DIR | |||||
NAMES NvInfer.h | |||||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to TRT include directory.") | |||||
find_path( | |||||
TRT_PLUGIN_INCLUDE_DIR | |||||
NAMES NvInferPlugin.h | |||||
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root} | |||||
PATH_SUFFIXES include | |||||
DOC "Path to TRT plugin include directory.") | |||||
if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND") | if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
) | |||||
endif() | endif() | ||||
if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND") | if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND") | ||||
message(FATAL_ERROR "Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env") | |||||
message( | |||||
FATAL_ERROR | |||||
"Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env" | |||||
) | |||||
endif() | endif() | ||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR | |||||
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR | |||||
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH | |||||
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
if (TensorRT_MAJOR STREQUAL "") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
if(TensorRT_MAJOR STREQUAL "") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR | |||||
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR | |||||
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") | |||||
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH | |||||
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") | |||||
endif() | endif() | ||||
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||||
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||||
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||||
set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||||
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" | |||||
TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") | |||||
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" | |||||
TensorRT_VERSION_MINOR "${TensorRT_MINOR}") | |||||
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" | |||||
TensorRT_VERSION_PATCH "${TensorRT_PATCH}") | |||||
set(TRT_VERSION_STRING | |||||
"${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") | |||||
if(MGE_CUDA_USE_STATIC) | if(MGE_CUDA_USE_STATIC) | ||||
add_library(libnvinfer STATIC IMPORTED) | |||||
add_library(libnvinfer_plugin STATIC IMPORTED) | |||||
add_library(libnvinfer STATIC IMPORTED) | |||||
add_library(libnvinfer_plugin STATIC IMPORTED) | |||||
else() | else() | ||||
add_library(libnvinfer SHARED IMPORTED) | |||||
add_library(libnvinfer_plugin SHARED IMPORTED) | |||||
add_library(libnvinfer SHARED IMPORTED) | |||||
add_library(libnvinfer_plugin SHARED IMPORTED) | |||||
endif() | endif() | ||||
set_target_properties(libnvinfer PROPERTIES | |||||
IMPORTED_LOCATION ${TRT_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_INCLUDE_DIR} | |||||
) | |||||
set_target_properties(libnvinfer_plugin PROPERTIES | |||||
IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR} | |||||
) | |||||
set_target_properties( | |||||
libnvinfer PROPERTIES IMPORTED_LOCATION ${TRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES | |||||
${TRT_INCLUDE_DIR}) | |||||
set_target_properties( | |||||
libnvinfer_plugin PROPERTIES IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR}) | |||||
message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||||
message( | |||||
STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})") | |||||
if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7) | ||||
if(MGE_CUDA_USE_STATIC) | |||||
find_library(LIBMYELIN_COMPILER | |||||
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||||
PATHS ${__found_trt_root}/lib | |||||
) | |||||
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||||
else() | |||||
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||||
endif() | |||||
add_library(libmyelin_compiler STATIC IMPORTED) | |||||
set_target_properties(libmyelin_compiler PROPERTIES | |||||
IMPORTED_LOCATION ${LIBMYELIN_COMPILER} | |||||
) | |||||
if(MGE_CUDA_USE_STATIC) | |||||
find_library( | |||||
LIBMYELIN_COMPILER | |||||
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib | |||||
PATHS ${__found_trt_root}/lib) | |||||
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library") | |||||
else() | |||||
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}") | |||||
endif() | |||||
add_library(libmyelin_compiler STATIC IMPORTED) | |||||
set_target_properties(libmyelin_compiler PROPERTIES IMPORTED_LOCATION | |||||
${LIBMYELIN_COMPILER}) | |||||
find_library(LIBMYELIN_EXECUTOR | |||||
NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||||
PATHS ${__found_trt_root}/lib | |||||
) | |||||
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||||
endif() | |||||
add_library(libmyelin_executor STATIC IMPORTED) | |||||
set_target_properties(libmyelin_executor PROPERTIES | |||||
IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR} | |||||
) | |||||
find_library( | |||||
LIBMYELIN_EXECUTOR | |||||
NAMES libmyelin_executor_static.a myelin_executor_static.lib | |||||
PATHS ${__found_trt_root}/lib) | |||||
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}") | |||||
endif() | |||||
add_library(libmyelin_executor STATIC IMPORTED) | |||||
set_target_properties(libmyelin_executor PROPERTIES IMPORTED_LOCATION | |||||
${LIBMYELIN_EXECUTOR}) | |||||
find_library(LIBMYELIN_PATTERN_RUNTIME | |||||
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||||
PATHS ${__found_trt_root}/lib | |||||
) | |||||
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||||
endif() | |||||
add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||||
set_target_properties(libmyelin_pattern_runtime PROPERTIES | |||||
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME} | |||||
) | |||||
find_library( | |||||
LIBMYELIN_PATTERN_RUNTIME | |||||
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib | |||||
PATHS ${__found_trt_root}/lib) | |||||
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library") | |||||
else() | |||||
message( | |||||
STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}") | |||||
endif() | |||||
add_library(libmyelin_pattern_runtime STATIC IMPORTED) | |||||
set_target_properties(libmyelin_pattern_runtime | |||||
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME}) | |||||
find_library(LIBMYELIN_PATTERN_LIBRARY | |||||
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||||
PATHS ${__found_trt_root}/lib | |||||
) | |||||
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||||
endif() | |||||
add_library(libmyelin_pattern_library STATIC IMPORTED) | |||||
set_target_properties(libmyelin_pattern_library PROPERTIES | |||||
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY} | |||||
) | |||||
find_library( | |||||
LIBMYELIN_PATTERN_LIBRARY | |||||
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib | |||||
PATHS ${__found_trt_root}/lib) | |||||
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library") | |||||
else() | else() | ||||
find_library(LIBMYELIN_SHARED | |||||
NAMES libmyelin.so myelin.dll | |||||
PATHS ${__found_trt_root}/lib | |||||
) | |||||
message( | |||||
STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}") | |||||
endif() | |||||
add_library(libmyelin_pattern_library STATIC IMPORTED) | |||||
set_target_properties(libmyelin_pattern_library | |||||
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY}) | |||||
else() | |||||
find_library( | |||||
LIBMYELIN_SHARED | |||||
NAMES libmyelin.so myelin.dll | |||||
PATHS ${__found_trt_root}/lib) | |||||
if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||||
endif() | |||||
add_library(libmyelin SHARED IMPORTED) | |||||
set_target_properties(libmyelin PROPERTIES | |||||
IMPORTED_LOCATION ${LIBMYELIN_SHARED} | |||||
) | |||||
if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND") | |||||
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library") | |||||
else() | |||||
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}") | |||||
endif() | endif() | ||||
add_library(libmyelin SHARED IMPORTED) | |||||
set_target_properties(libmyelin PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_SHARED}) | |||||
endif() | |||||
endif() | endif() |
@@ -1,17 +1,26 @@ | |||||
include(ExternalProject) | include(ExternalProject) | ||||
include(GNUInstallDirs) | include(GNUInstallDirs) | ||||
set(ZMQ_DIR ${PROJECT_SOURCE_DIR}/third_party/libzmq CACHE STRING "ZMQ directory") | |||||
set(ZMQ_DIR | |||||
${PROJECT_SOURCE_DIR}/third_party/libzmq | |||||
CACHE STRING "ZMQ directory") | |||||
set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq) | set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq) | ||||
set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a) | set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a) | ||||
ExternalProject_add( | |||||
zmq | |||||
SOURCE_DIR ${ZMQ_DIR} | |||||
PREFIX ${ZMQ_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} -DWITH_PERF_TOOL=OFF -DZMQ_BUILD_TESTS=OFF -DENABLE_CPACK=OFF -DENABLE_CURVE=OFF | |||||
BUILD_BYPRODUCTS ${ZMQ_LIB} | |||||
) | |||||
ExternalProject_Add( | |||||
zmq | |||||
SOURCE_DIR ${ZMQ_DIR} | |||||
PREFIX ${ZMQ_BUILD_DIR} | |||||
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} | |||||
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} | |||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} | |||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} | |||||
-DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} | |||||
-DWITH_PERF_TOOL=OFF | |||||
-DZMQ_BUILD_TESTS=OFF | |||||
-DENABLE_CPACK=OFF | |||||
-DENABLE_CURVE=OFF | |||||
BUILD_BYPRODUCTS ${ZMQ_LIB}) | |||||
set(ZMQ_INC ${ZMQ_BUILD_DIR}/include) | set(ZMQ_INC ${ZMQ_BUILD_DIR}/include) | ||||
include_directories(${ZMQ_INC}) | include_directories(${ZMQ_INC}) | ||||
@@ -19,8 +28,5 @@ file(MAKE_DIRECTORY ${ZMQ_INC}) | |||||
add_library(libzmq STATIC IMPORTED GLOBAL) | add_library(libzmq STATIC IMPORTED GLOBAL) | ||||
add_dependencies(libzmq zmq) | add_dependencies(libzmq zmq) | ||||
set_target_properties( | |||||
libzmq PROPERTIES | |||||
IMPORTED_LOCATION ${ZMQ_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC} | |||||
) | |||||
set_target_properties(libzmq PROPERTIES IMPORTED_LOCATION ${ZMQ_LIB} | |||||
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC}) |
@@ -4,66 +4,61 @@ set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py) | |||||
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/) | set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/) | ||||
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn) | file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn) | ||||
add_custom_command( | add_custom_command( | ||||
OUTPUT | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} | |||||
tmp_unuse.log --write-cppjson ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
VERBATIM | |||||
) | |||||
list(APPEND OPR_PARAM_DEFS_OUTS | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
COMMAND | |||||
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
${OPR_PARAM_DEFS_SRCS} tmp_unuse.log --write-cppjson | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h | ||||
) | |||||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
VERBATIM) | |||||
list(APPEND OPR_PARAM_DEFS_OUTS ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h | |||||
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h) | |||||
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}) | ||||
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common) | file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common) | ||||
add_custom_command( | add_custom_command( | ||||
OUTPUT | |||||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} | |||||
--enumv ${OPR_PARAM_DEFS_SRCS} | |||||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
VERBATIM | |||||
) | |||||
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
COMMAND | |||||
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} --enumv | |||||
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT} | |||||
VERBATIM) | |||||
list(APPEND OPR_PARAM_DEFS_OUTS | list(APPEND OPR_PARAM_DEFS_OUTS | ||||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh | |||||
) | |||||
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh) | |||||
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR}) | ||||
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h") | |||||
install( | |||||
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn | |||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} | |||||
FILES_MATCHING | |||||
PATTERN "*.h") | |||||
add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS}) | ||||
add_library(opr_param_defs INTERFACE) | add_library(opr_param_defs INTERFACE) | ||||
target_include_directories(opr_param_defs | target_include_directories(opr_param_defs | ||||
INTERFACE | |||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
) | |||||
foreach (INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||||
target_include_directories(opr_param_defs | |||||
INTERFACE $<BUILD_INTERFACE:${INCPATH}> | |||||
) | |||||
INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>) | |||||
foreach(INCPATH IN LISTS OPR_PARAM_DEFS_INC) | |||||
target_include_directories(opr_param_defs INTERFACE $<BUILD_INTERFACE:${INCPATH}>) | |||||
endforeach() | endforeach() | ||||
add_dependencies(opr_param_defs _opr_param_defs) | add_dependencies(opr_param_defs _opr_param_defs) | ||||
install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS}) | ||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
add_library(cutlass INTERFACE) | |||||
target_include_directories(cutlass | |||||
INTERFACE | |||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
add_library(cutlass INTERFACE) | |||||
target_include_directories( | |||||
cutlass | |||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>) | |||||
endif() | endif() | ||||
if(MGE_WITH_TEST) | if(MGE_WITH_TEST) | ||||
add_subdirectory(test) | |||||
add_subdirectory(test) | |||||
endif() | endif() | ||||
add_subdirectory(src) | add_subdirectory(src) | ||||
@@ -1,6 +1,8 @@ | |||||
add_library(atlas-stub STATIC src/libatlas-wrap.cpp) | add_library(atlas-stub STATIC src/libatlas-wrap.cpp) | ||||
target_include_directories(atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
target_include_directories( | |||||
atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS}) | ||||
add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp) | add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp) | ||||
target_include_directories(acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) | |||||
target_include_directories( | |||||
acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) |
@@ -1,26 +1,27 @@ | |||||
file (GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||||
file (GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||||
file(GLOB_RECURSE CUDA_STUB src/libcuda.cpp) | |||||
file(GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp) | |||||
if(MGE_WITH_CUDA_STUB) | if(MGE_WITH_CUDA_STUB) | ||||
list(APPEND STUB_SRC ${CUDA_STUB}) | |||||
list(APPEND STUB_SRC ${CUDA_STUB}) | |||||
endif() | endif() | ||||
if(MGE_WITH_NVRTC_STUB) | if(MGE_WITH_NVRTC_STUB) | ||||
list(APPEND STUB_SRC ${NVRTC_STUB}) | |||||
list(APPEND STUB_SRC ${NVRTC_STUB}) | |||||
endif() | endif() | ||||
if(MSVC OR WIN32) | if(MSVC OR WIN32) | ||||
add_library (cuda-stub STATIC ${STUB_SRC}) | |||||
add_library(cuda-stub STATIC ${STUB_SRC}) | |||||
else() | else() | ||||
add_library (cuda-stub SHARED ${STUB_SRC}) | |||||
add_library(cuda-stub SHARED ${STUB_SRC}) | |||||
endif() | endif() | ||||
set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub) | set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub) | ||||
target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL) | target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL) | ||||
if (MSVC OR WIN32) | |||||
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||||
if(MSVC OR WIN32) | |||||
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined) | |||||
else() | else() | ||||
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||||
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined) | |||||
endif() | endif() | ||||
target_include_directories(cuda-stub PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||||
install (TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) | |||||
target_include_directories(cuda-stub | |||||
PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>) | |||||
install(TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS}) |
@@ -12,6 +12,7 @@ | |||||
#pragma once | #pragma once | ||||
#include "megbrain_build_config.h" | #include "megbrain_build_config.h" | ||||
#include "megdnn/oprs/base.h" | |||||
#if MGB_ENABLE_GETENV | #if MGB_ENABLE_GETENV | ||||
#define MGB_GETENV ::std::getenv | #define MGB_GETENV ::std::getenv | ||||
@@ -36,6 +37,11 @@ bool has_available_algo(Opr* opr, Args&&... args) { | |||||
return !all_algos.empty(); | return !all_algos.empty(); | ||||
} | } | ||||
template <class Opr, typename... Args> | |||||
bool has_no_naive_heuristic_algo(Opr* opr, Args&&... args) { | |||||
auto&& algo = opr->get_algorithm_info_heuristic(std::forward<Args>(args)...); | |||||
return !static_cast<bool>(algo.attribute & detail::Algorithm::Attribute::NAIVE); | |||||
} | |||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} | // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} |
@@ -1936,6 +1936,119 @@ protected: | |||||
const TensorLayout& grad_s, size_t workspace_in_bytes); | const TensorLayout& grad_s, size_t workspace_in_bytes); | ||||
}; | }; | ||||
class LayerNormBase : public OperatorBase { | |||||
DEF_OPR_IMPL_CTOR(LayerNormBase, OperatorBase); | |||||
DEF_OPR_PARAM(LayerNorm); | |||||
protected: | |||||
void deduce_layout_fwd( | |||||
const TensorLayout& data, const TensorLayout& weight, | |||||
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||||
TensorLayout& rstd); | |||||
void check_layout_fwd( | |||||
const TensorLayout& data, const TensorLayout& weight, | |||||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
const TensorLayout& rstd); | |||||
}; | |||||
class LayerNormForward : public LayerNormBase { | |||||
DEF_OPR_IMPL(LayerNormForward, LayerNormBase, 3, 3); | |||||
public: | |||||
virtual void exec( | |||||
_megdnn_tensor_in data, _megdnn_tensor_in weight, _megdnn_tensor_in bias, | |||||
_megdnn_tensor_out dst, _megdnn_tensor_out mean, _megdnn_tensor_out rstd, | |||||
_megdnn_workspace workspace) = 0; | |||||
void deduce_layout( | |||||
const TensorLayout& data, const TensorLayout& weight, | |||||
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean, | |||||
TensorLayout& rstd); | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& data, const TensorLayout& weight, | |||||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
const TensorLayout& rstd) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& data, const TensorLayout& weight, | |||||
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean, | |||||
const TensorLayout& rstd, size_t workspace_in_bytes); | |||||
}; | |||||
using LayerNorm = LayerNormForward; | |||||
class LayerNormBackward : public LayerNormBase { | |||||
DEF_OPR_IMPL(LayerNormBackward, LayerNormBase, 5, 3); | |||||
public: | |||||
virtual void exec( | |||||
_megdnn_tensor_in diff, _megdnn_tensor_in data, _megdnn_tensor_in weight, | |||||
_megdnn_tensor_in mean, _megdnn_tensor_in rstd, _megdnn_tensor_out ddata, | |||||
_megdnn_tensor_out dweight, _megdnn_tensor_out dbias, | |||||
_megdnn_workspace workspace) = 0; | |||||
void deduce_layout( | |||||
const TensorLayout& diff, const TensorLayout& data, | |||||
const TensorLayout& weight, const TensorLayout& mean, | |||||
const TensorLayout& rstd, TensorLayout& ddata, TensorLayout& dweight, | |||||
TensorLayout& dbias); | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& diff, const TensorLayout& data, | |||||
const TensorLayout& weight, const TensorLayout& mean, | |||||
const TensorLayout& rstd, const TensorLayout& ddata, | |||||
const TensorLayout& dweight, const TensorLayout& dbias) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& diff, const TensorLayout& data, | |||||
const TensorLayout& weight, const TensorLayout& mean, | |||||
const TensorLayout& rstd, const TensorLayout& ddata, | |||||
const TensorLayout& dweight, const TensorLayout& dbias, | |||||
size_t workspace_in_bytes); | |||||
}; | |||||
class DropoutBase : public OperatorBase { | |||||
DEF_OPR_IMPL_CTOR(DropoutBase, OperatorBase); | |||||
DEF_OPR_PARAM(Dropout); | |||||
}; | |||||
class DropoutForward : public DropoutBase { | |||||
DEF_OPR_IMPL(DropoutForward, DropoutBase, 1, 2); | |||||
public: | |||||
void deduce_layout(const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask); | |||||
virtual void exec( | |||||
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask, | |||||
_megdnn_workspace workspace) = 0; | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& inp, const TensorLayout& oup, | |||||
const TensorLayout& mask) = 0; | |||||
virtual size_t get_mask_size_in_bytes(const TensorLayout& inp) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask, | |||||
size_t workspace_in_bytes); | |||||
}; | |||||
using Dropout = DropoutForward; | |||||
class DropoutBackward : public DropoutBase { | |||||
DEF_OPR_IMPL(DropoutBackward, DropoutBase, 2, 1); | |||||
public: | |||||
void deduce_layout( | |||||
const TensorLayout& doup, const TensorLayout& mask, TensorLayout& dinp); | |||||
virtual void exec( | |||||
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp, | |||||
_megdnn_workspace workspace) = 0; | |||||
virtual size_t get_workspace_in_bytes( | |||||
const TensorLayout& doup, const TensorLayout& mask, | |||||
const TensorLayout& dinp) = 0; | |||||
protected: | |||||
void check_exec( | |||||
const TensorLayout& doup, const TensorLayout& mask, | |||||
const TensorLayout& dinp, size_t workspace_in_bytes); | |||||
}; | |||||
} // namespace megdnn | } // namespace megdnn | ||||
#include "megdnn/internal/opr_header_epilogue.h" | #include "megdnn/internal/opr_header_epilogue.h" | ||||
@@ -1212,3 +1212,15 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'), | |||||
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES] | ||||
) | ) | ||||
) | ) | ||||
(pdef('LayerNorm') | |||||
.add_fields('bool', 'affine', 'true') | |||||
.add_fields('float32', 'eps', '1e-5f') | |||||
.add_fields('uint64', 'normalized_dim', '1') | |||||
.add_fields('uint64', 'normalized_size', '1') | |||||
) | |||||
(pdef('Dropout') | |||||
.add_fields('float32', 'drop_prob', '0') | |||||
.add_fields('uint64', 'seed', '0') | |||||
) |
@@ -5,168 +5,190 @@ file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp) | |||||
list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h) | ||||
if(NOT ${MGE_ARCH} STREQUAL "naive") | if(NOT ${MGE_ARCH} STREQUAL "naive") | ||||
file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||||
file(GLOB_RECURSE SOURCES_ fallback/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
if(${MGE_ARCH} STREQUAL "fallback") | |||||
message(WARNING "build only with fallback") | |||||
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | list(APPEND SOURCES ${SOURCES_}) | ||||
if(${MGE_ARCH} STREQUAL "fallback") | |||||
message(WARNING "build only with fallback") | |||||
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386") | |||||
file(GLOB_RECURSE SOURCES_ x86/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
if(NOT MSVC) | |||||
file(GLOB_RECURSE SOURCES_ x86/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
endif() | |||||
elseif(${MGE_ARCH} STREQUAL "armv7") | |||||
file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
elseif(${MGE_ARCH} STREQUAL "aarch64") | |||||
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
if(NOT MSVC) | |||||
file(GLOB_RECURSE SOURCES_ x86/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
endif() | endif() | ||||
elseif(${MGE_ARCH} STREQUAL "armv7") | |||||
file(GLOB_RECURSE SOURCES_ armv7/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ armv7/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
elseif(${MGE_ARCH} STREQUAL "aarch64") | |||||
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ aarch64/*.S) | |||||
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
endif() | |||||
endif() | endif() | ||||
if(MGE_WITH_MIDOUT_PROFILE) | if(MGE_WITH_MIDOUT_PROFILE) | ||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||||
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp) | |||||
endif() | endif() | ||||
############################################################################### | |||||
# ###################################################################################### | |||||
# HIP_COMPILE | # HIP_COMPILE | ||||
############################################################################### | |||||
macro (HIP_COMPILE _hip_target _hip_objs) | |||||
# Separate the sources from the options | |||||
HIP_GET_SOURCES_AND_OPTIONS(_sources | |||||
_cmake_options | |||||
_hipcc_options | |||||
_hcc_options | |||||
_nvcc_options | |||||
${ARGN}) | |||||
HIP_PREPARE_TARGET_COMMANDS(${_hip_target} | |||||
OBJ _generated_files _source_files ${_sources} ${_cmake_options} | |||||
HIPCC_OPTIONS ${_hipcc_options} | |||||
HCC_OPTIONS ${_hcc_options} | |||||
NVCC_OPTIONS ${_nvcc_options}) | |||||
if(_source_files) | |||||
list(REMOVE_ITEM _sources ${_source_files}) | |||||
endif() | |||||
# ###################################################################################### | |||||
macro(HIP_COMPILE _hip_target _hip_objs) | |||||
# Separate the sources from the options | |||||
hip_get_sources_and_options(_sources _cmake_options _hipcc_options _hcc_options | |||||
_nvcc_options ${ARGN}) | |||||
hip_prepare_target_commands( | |||||
${_hip_target} | |||||
OBJ | |||||
_generated_files | |||||
_source_files | |||||
${_sources} | |||||
${_cmake_options} | |||||
HIPCC_OPTIONS | |||||
${_hipcc_options} | |||||
HCC_OPTIONS | |||||
${_hcc_options} | |||||
NVCC_OPTIONS | |||||
${_nvcc_options}) | |||||
if(_source_files) | |||||
list(REMOVE_ITEM _sources ${_source_files}) | |||||
endif() | |||||
add_custom_target(${_hip_target}) | |||||
add_custom_target(${_hip_target}) | |||||
# set return value | |||||
set(${_hip_objs} ${_generated_files}) | |||||
# set return value | |||||
set(${_hip_objs} ${_generated_files}) | |||||
endmacro() | endmacro() | ||||
if (MGE_WITH_ROCM) | |||||
file (GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||||
list (APPEND SOURCES ${SOURCES_}) | |||||
# FIXME rocm may lost the first hip file, so currently we just create an | |||||
# empty file to bypass this error. | |||||
file(GLOB start.cpp.hip "" ) | |||||
list(APPEND HIP_SOURCES start.cpp.hip) | |||||
configure_file( | |||||
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||||
configure_file( | |||||
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||||
file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||||
set(HIP_TARGET_NAME megdnn_hip_kernel) | |||||
set(_HIPCC_OPTIONS "-fPIC") | |||||
set(_HCC_OPTIONS "-fPIC") | |||||
set(_NVCC_OPTIONS "-fPIC") | |||||
list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||||
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||||
HIP_INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/dnn | |||||
${PROJECT_SOURCE_DIR}/dnn/include | |||||
${PROJECT_BINARY_DIR}/dnn | |||||
${PROJECT_BINARY_DIR}/genfiles | |||||
${PROJECT_BINARY_DIR}/dnn/include | |||||
${HIP_INCLUDE_DIR} | |||||
${MIOPEN_INCLUDE_DIR} | |||||
${ROCBLAS_INCLUDE_DIR} | |||||
${ROCRAND_INCLUDE_DIR} | |||||
${AMDOCL_INCLUDE_DIR}) | |||||
hip_compile( | |||||
${HIP_TARGET_NAME} HIPOBJS ${HIP_SOURCES} | |||||
HIPCC_OPTIONS ${_HIPCC_OPTIONS} | |||||
HCC_OPTIONS ${_HCC_OPTIONS} | |||||
NVCC_OPTIONS ${_NVCC_OPTIONS}) | |||||
list(APPEND SOURCES ${HIPOBJS}) | |||||
endif () | |||||
if(MGE_WITH_ROCM) | |||||
file(GLOB_RECURSE SOURCES_ rocm/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
# FIXME rocm may lost the first hip file, so currently we just create an empty file to | |||||
# bypass this error. | |||||
file(GLOB start.cpp.hip "") | |||||
list(APPEND HIP_SOURCES start.cpp.hip) | |||||
configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in | |||||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h) | |||||
configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in | |||||
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h) | |||||
file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip) | |||||
set(HIP_TARGET_NAME megdnn_hip_kernel) | |||||
set(_HIPCC_OPTIONS "-fPIC") | |||||
set(_HCC_OPTIONS "-fPIC") | |||||
set(_NVCC_OPTIONS "-fPIC") | |||||
list(APPEND HIP_SOURCES ${HIP_SOURCES_}) | |||||
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | |||||
hip_include_directories( | |||||
${PROJECT_SOURCE_DIR}/dnn | |||||
${PROJECT_SOURCE_DIR}/dnn/include | |||||
${PROJECT_BINARY_DIR}/dnn | |||||
${PROJECT_BINARY_DIR}/genfiles | |||||
${PROJECT_BINARY_DIR}/dnn/include | |||||
${HIP_INCLUDE_DIR} | |||||
${MIOPEN_INCLUDE_DIR} | |||||
${ROCBLAS_INCLUDE_DIR} | |||||
${ROCRAND_INCLUDE_DIR} | |||||
${AMDOCL_INCLUDE_DIR}) | |||||
hip_compile( | |||||
${HIP_TARGET_NAME} | |||||
HIPOBJS | |||||
${HIP_SOURCES} | |||||
HIPCC_OPTIONS | |||||
${_HIPCC_OPTIONS} | |||||
HCC_OPTIONS | |||||
${_HCC_OPTIONS} | |||||
NVCC_OPTIONS | |||||
${_NVCC_OPTIONS}) | |||||
list(APPEND SOURCES ${HIPOBJS}) | |||||
endif() | |||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE SOURCES_ cuda/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||||
set(CUTLASS_GEN_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||||
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||||
set(CUTLASS_SOURCES "") | |||||
function(gen_cutlass_kimpl op type gen_files) | |||||
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||||
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||||
set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||||
execute_process( | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations ${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||||
RESULT_VARIABLE gen_cutlass_result | |||||
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
) | |||||
if (NOT gen_cutlass_result EQUAL 0) | |||||
message(FATAL_ERROR "Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log") | |||||
endif() | |||||
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
if (NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||||
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||||
endif() | |||||
endforeach() | |||||
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" "${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||||
endforeach() | |||||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||||
set(${gen_files} "${${gen_files}}" PARENT_SCOPE) | |||||
endfunction() | |||||
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||||
list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||||
list(APPEND SOURCES ${CUSOURCES}) | |||||
file(GLOB_RECURSE CUSOURCES cuda/*.cu) | |||||
set(CUTLASS_GEN_SCRIPT | |||||
${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py) | |||||
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated) | |||||
set(CUTLASS_SOURCES "") | |||||
function(gen_cutlass_kimpl op type gen_files) | |||||
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage) | |||||
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type}) | |||||
set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT}) | |||||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR}) | |||||
execute_process( | |||||
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations | |||||
${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR} | |||||
RESULT_VARIABLE gen_cutlass_result | |||||
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log | |||||
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log) | |||||
if(NOT gen_cutlass_result EQUAL 0) | |||||
message( | |||||
FATAL_ERROR | |||||
"Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log" | |||||
) | |||||
endif() | |||||
file( | |||||
GLOB CUTLASS_GEN_FILES | |||||
RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" | |||||
"${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
if(NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}") | |||||
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}") | |||||
endif() | |||||
endforeach() | |||||
file( | |||||
GLOB CUTLASS_GEN_FILES | |||||
RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" | |||||
"${CURRENT_CUTLASS_STAGE_DIR}/*.cu") | |||||
foreach(FILE ${CUTLASS_GEN_FILES}) | |||||
execute_process( | |||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different | |||||
"${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}") | |||||
endforeach() | |||||
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR}) | |||||
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu") | |||||
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES}) | |||||
set(${gen_files} | |||||
"${${gen_files}}" | |||||
PARENT_SCOPE) | |||||
endfunction() | |||||
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES) | |||||
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES) | |||||
list(APPEND SOURCES ${CUTLASS_SOURCES}) | |||||
list(APPEND SOURCES ${CUSOURCES}) | |||||
endif() | endif() | ||||
if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||||
file(GLOB_RECURSE SOURCES_ atlas/*.cpp) | |||||
list(APPEND SOURCES ${SOURCES_}) | |||||
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1) | |||||
endif() | endif() | ||||
add_definitions(${LIBMEGDNN_DEF}) | add_definitions(${LIBMEGDNN_DEF}) | ||||
@@ -174,81 +196,85 @@ add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES}) | |||||
target_link_libraries(megdnn PUBLIC opr_param_defs) | target_link_libraries(megdnn PUBLIC opr_param_defs) | ||||
if(MGE_WITH_CUDA) | if(MGE_WITH_CUDA) | ||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||||
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>) | |||||
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR}) | |||||
endif() | endif() | ||||
if(MGE_WITH_ROCM) | if(MGE_WITH_ROCM) | ||||
target_include_directories(megdnn PUBLIC | |||||
${HIP_INCLUDE_DIR} | |||||
${MIOPEN_INCLUDE_DIR} | |||||
${ROCBLAS_INCLUDE_DIR} | |||||
${ROCRAND_INCLUDE_DIR} | |||||
${AMDOCL_INCLUDE_DIR}) | |||||
target_link_directories(megdnn PUBLIC | |||||
${HIP_LIBRARY_DIR} | |||||
${MIOPEN_LIBRARY_DIR} | |||||
${ROCBLAS_LIBRARY_DIR} | |||||
${ROCRAND_LIBRARY_DIR} | |||||
${AMDOCL_LIBRARY_DIR}) | |||||
target_include_directories( | |||||
megdnn PUBLIC ${HIP_INCLUDE_DIR} ${MIOPEN_INCLUDE_DIR} ${ROCBLAS_INCLUDE_DIR} | |||||
${ROCRAND_INCLUDE_DIR} ${AMDOCL_INCLUDE_DIR}) | |||||
target_link_directories( | |||||
megdnn | |||||
PUBLIC | |||||
${HIP_LIBRARY_DIR} | |||||
${MIOPEN_LIBRARY_DIR} | |||||
${ROCBLAS_LIBRARY_DIR} | |||||
${ROCRAND_LIBRARY_DIR} | |||||
${AMDOCL_LIBRARY_DIR}) | |||||
endif() | endif() | ||||
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") | |||||
if(MGE_ENABLE_CPUINFO) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||||
endif() | |||||
if(${MGE_ARCH} STREQUAL "x86_64" | |||||
OR ${MGE_ARCH} STREQUAL "i386" | |||||
OR ${MGE_ARCH} STREQUAL "armv7" | |||||
OR ${MGE_ARCH} STREQUAL "aarch64") | |||||
if(MGE_ENABLE_CPUINFO) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>) | |||||
endif() | |||||
endif() | endif() | ||||
target_include_directories(megdnn | |||||
PUBLIC | |||||
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
PRIVATE | |||||
${PROJECT_SOURCE_DIR}/dnn | |||||
${PROJECT_SOURCE_DIR}/third_party/midout/src | |||||
) | |||||
target_include_directories( | |||||
megdnn | |||||
PUBLIC $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles> | |||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include> | |||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> | |||||
PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src) | |||||
install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*") | |||||
install( | |||||
DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include | |||||
DESTINATION . | |||||
FILES_MATCHING | |||||
PATTERN "*.h*") | |||||
if(CXX_SUPPORT_WCLASS_MEMACCESS) | if(CXX_SUPPORT_WCLASS_MEMACCESS) | ||||
if(MGE_WITH_CUDA) | |||||
target_compile_options(megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||||
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||||
else() | |||||
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||||
endif() | |||||
if(MGE_WITH_CUDA) | |||||
target_compile_options( | |||||
megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>" | |||||
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>") | |||||
else() | |||||
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess") | |||||
endif() | |||||
endif() | endif() | ||||
target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF}) | target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF}) | ||||
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64") | ||||
if (BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||||
else() | |||||
target_link_libraries(megdnn PRIVATE dnnl) | |||||
endif() | |||||
if(BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>) | |||||
else() | |||||
target_link_libraries(megdnn PRIVATE dnnl) | |||||
endif() | |||||
endif() | endif() | ||||
if (BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||||
if(BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>) | |||||
else() | else() | ||||
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||||
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS}) | |||||
endif() | endif() | ||||
if (MGE_WITH_ROCM) | |||||
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||||
endif () | |||||
if(MGE_WITH_ROCM) | |||||
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS}) | |||||
endif() | |||||
if(MGE_WITH_ATLAS) | if(MGE_WITH_ATLAS) | ||||
if (BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||||
else() | |||||
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||||
endif() | |||||
if(BUILD_SHARED_LIBS) | |||||
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>) | |||||
else() | |||||
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS}) | |||||
endif() | |||||
endif() | endif() | ||||
if(CMAKE_THREAD_LIBS_INIT) | if(CMAKE_THREAD_LIBS_INIT) | ||||
target_link_libraries(megdnn PRIVATE Threads::Threads) | |||||
target_link_libraries(megdnn PRIVATE Threads::Threads) | |||||
endif() | endif() | ||||
install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS}) | install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS}) |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
INSTANTIATION_CONV_S1(2); | |||||
INSTANTIATION_CONV_S1_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_NO_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
INSTANTIATION_CONV_S2(5); | |||||
INSTANTIATION_CONV_S2_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_NO_BIAS(2); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
INSTANTIATION_CONV_S1(5); | |||||
INSTANTIATION_CONV_S1_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_NO_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
INSTANTIATION_CONV_S2(2); | |||||
INSTANTIATION_CONV_S2_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_NO_BIAS(3); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
INSTANTIATION_CONV_S1(3); | |||||
INSTANTIATION_CONV_S1_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_NO_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
INSTANTIATION_CONV_S2(7); | |||||
INSTANTIATION_CONV_S2_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_NO_BIAS(5); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | ||||
INSTANTIATION_CONV_S1(7); | |||||
INSTANTIATION_CONV_S1_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h" | |||||
INSTANTIATION_CONV_S1_NO_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | ||||
INSTANTIATION_CONV_S2(3); | |||||
INSTANTIATION_CONV_S2_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h" | |||||
INSTANTIATION_CONV_S2_NO_BIAS(7); | |||||
// vim: syntax=cpp.doxygen |
@@ -469,9 +469,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||||
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | ||||
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | ||||
#define INSTANTIATION_CONV_S1(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
FOR_OP(filter_size, BiasMode::BIAS) | |||||
#define INSTANTIATION_CONV_S1_NO_BIAS(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::NO_BIAS) | |||||
// vim: syntax=cpp.doxygen | |||||
#define INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
#define INSTANTIATION_CONV_S1_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||||
// vim: syntax=cpp.doxygen |
@@ -550,9 +550,12 @@ void conv_bias::conv_direct_fp32_nchw44( | |||||
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \ | ||||
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>) | ||||
#define INSTANTIATION_CONV_S2(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::NO_BIAS) \ | |||||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
FOR_OP(filter_size, BiasMode::BIAS) | |||||
#define INSTANTIATION_CONV_S2_NO_BIAS(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::NO_BIAS) | |||||
// vim: syntax=cpp.doxygen | |||||
#define INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(filter_size) \ | |||||
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
#define INSTANTIATION_CONV_S2_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS) | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
INSTANCE_CONV(2, 1); | |||||
INSTANCE_CONV_BIAS(2, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(2, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
INSTANCE_CONV(2, 2); | |||||
INSTANCE_CONV_BIAS(2, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(2, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
INSTANCE_CONV(3, 1); | |||||
INSTANCE_CONV_BIAS(3, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(3, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2.cpp | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -11,4 +11,5 @@ | |||||
* implied. | * implied. | ||||
*/ | */ | ||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | #include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | ||||
INSTANCE_CONV(3, 2); | |||||
INSTANCE_CONV_BIAS(3, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(3, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BIAS(5, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(5, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BIAS(5, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(5, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BIAS(7, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(7, 1); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BIAS(7, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,15 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h" | |||||
INSTANCE_CONV_NO_BIAS(7, 2); | |||||
// vim: syntax=cpp.doxygen |
@@ -928,9 +928,11 @@ void fp32_direct_nchw_nchw44::conv_direct_fp32_nchw_nchw44( | |||||
INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \ | INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \ | ||||
INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>) | INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>) | ||||
#define INSTANCE_CONV(filter, stride) \ | |||||
FOR_OP(stride, filter, BiasMode::NO_BIAS) \ | |||||
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) \ | |||||
FOR_OP(stride, filter, BiasMode::BIAS) | |||||
#define INSTANCE_CONV_NO_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::NO_BIAS) | |||||
#define INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(filter, stride) \ | |||||
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
#define INSTANCE_CONV_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::BIAS) | |||||
// vim: syntax=cpp.doxygen | // vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.cpp | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -265,7 +265,8 @@ void conv_direct_sdot_int8_nchw44( | |||||
#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | ||||
template void \ | template void \ | ||||
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||||
dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
dst_type * dst, const int oh, const int ow, const int8_t* src, \ | dst_type * dst, const int oh, const int ow, const int8_t* src, \ | ||||
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | ||||
const int oh_size, const int oc, const int ic, const Op& op); | const int oh_size, const int oc, const int ic, const Op& op); | ||||
@@ -284,22 +285,6 @@ void conv_direct_sdot_int8_nchw44( | |||||
FOR_OP(stride, i, BiasMode::NO_BIAS) \ | FOR_OP(stride, i, BiasMode::NO_BIAS) \ | ||||
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
#define FOR_FILTER(stride) \ | |||||
FOR_BIAS(stride, 2) \ | |||||
FOR_BIAS(stride, 3) \ | |||||
FOR_BIAS(stride, 5) \ | |||||
FOR_BIAS(stride, 7) | |||||
FOR_FILTER(1) | |||||
#undef FOR_STRIDE | |||||
#undef FOR_FILTER | |||||
#undef FOR_IC | |||||
#undef FOR_BIAS | |||||
#undef FOR_NONLINEAR | |||||
#undef FOR_REMAIN | |||||
#undef INSTANTIATION | |||||
} // namespace direct_dotprod_nchw44 | } // namespace direct_dotprod_nchw44 | ||||
} // namespace arm_common | } // namespace arm_common | ||||
} // namespace megdnn | } // namespace megdnn |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(1, 2); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(1, 3); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(1, 5); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(1, 7); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.cpp | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -266,7 +266,8 @@ void conv_direct_sdot_int8_nchw44( | |||||
#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | #define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \ | ||||
template void \ | template void \ | ||||
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \ | |||||
dst_type, stride, bias_mode, Op, filter_size>( \ | |||||
dst_type * dst, const int oh, const int ow, const int8_t* src, \ | dst_type * dst, const int oh, const int ow, const int8_t* src, \ | ||||
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | const int ih, const int iw, const int8_t* weight, const int32_t* bias, \ | ||||
const int oh_size, const int oc, const int ic, const Op& op); | const int oh_size, const int oc, const int ic, const Op& op); | ||||
@@ -285,22 +286,6 @@ void conv_direct_sdot_int8_nchw44( | |||||
FOR_OP(stride, i, BiasMode::NO_BIAS) \ | FOR_OP(stride, i, BiasMode::NO_BIAS) \ | ||||
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
#define FOR_FILTER(stride) \ | |||||
FOR_BIAS(stride, 2) \ | |||||
FOR_BIAS(stride, 3) \ | |||||
FOR_BIAS(stride, 5) \ | |||||
FOR_BIAS(stride, 7) | |||||
FOR_FILTER(2) | |||||
#undef FOR_STRIDE | |||||
#undef FOR_FILTER | |||||
#undef FOR_IC | |||||
#undef FOR_BIAS | |||||
#undef FOR_NONLINEAR | |||||
#undef FOR_REMAIN | |||||
#undef INSTANTIATION | |||||
} // namespace direct_dotprod_nchw44 | } // namespace direct_dotprod_nchw44 | ||||
} // namespace arm_common | } // namespace arm_common | ||||
} // namespace megdnn | } // namespace megdnn |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(2, 2); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(2, 3); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(2, 5); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,21 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h" | |||||
#if MGB_ENABLE_DOT | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
FOR_BIAS(2, 7); | |||||
#endif | |||||
// vim: syntax=cpp.doxygen |
@@ -1,6 +1,6 @@ | |||||
/** | /** | ||||
* \file | * \file | ||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | ||||
* | * | ||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | ||||
@@ -45,4 +45,4 @@ public: | |||||
} // namespace arm_common | } // namespace arm_common | ||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | |||||
// vim: syntax=cpp.doxygen |
@@ -13,336 +13,9 @@ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | #include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | ||||
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | #include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | ||||
namespace megdnn { | namespace megdnn { | ||||
namespace arm_common { | namespace arm_common { | ||||
namespace { | |||||
/** | |||||
* @brief core code for calculation patten | |||||
* | |||||
* @tparam src_idx is offset of src reg | |||||
* @tparam weight_idx is offset of weight reg | |||||
* @tparam c_dim is output channel | |||||
* @tparam Func mla operation funcion | |||||
* @tparam stride | |||||
* @tparam T outpur regs type | |||||
* @tparam T2 src regs type | |||||
* @tparam T3 weight regs type | |||||
* @tparam T4 temp regs type | |||||
*/ | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3, typename T4> | |||||
struct ShiftCalHelper { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3, typename T4> | |||||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||||
c, src, weight, temp); | |||||
} | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3> | |||||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||||
c, src, weight); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
c[0][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
c[1][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||||
c[0][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||||
c[1][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||||
c[0][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||||
c[1][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||||
c[0][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||||
c[1][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||||
c[0][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
c[1][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||||
c[0][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||||
c[1][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||||
c[0][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||||
c[1][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||||
c[0][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||||
c[1][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||||
} | |||||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
c[0][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
c[0][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||||
c[0][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||||
c[0][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||||
c[0][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
c[0][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||||
c[0][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||||
c[0][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||||
} | |||||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 1; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 2; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 3; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 5; | |||||
constexpr int filter_width = 8; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 2; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
#define cb(step) \ | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
UNROLL_CALL_RAW(5, cb); | |||||
#undef cb | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 7; | |||||
constexpr int filter_width = 8; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 2; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
#define cb(step) \ | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
UNROLL_CALL_RAW(7, cb); | |||||
#undef cb | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
} // namespace | |||||
namespace int8_direct_nchw_nchw44 { | namespace int8_direct_nchw_nchw44 { | ||||
/** | /** | ||||
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | * pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | ||||
@@ -444,115 +117,9 @@ void pack_nchw_src_for_nchw44_conv<1>( | |||||
} | } | ||||
} | } | ||||
template <BiasMode bias_mode, typename Op, size_t filter_size> | |||||
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
static void impl( | |||||
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||||
int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||||
const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||||
MEGDNN_MARK_USED_VAR(temp); | |||||
constexpr int stride = 1; | |||||
constexpr size_t fh = filter_size; | |||||
constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||||
constexpr size_t ic_step = 1; | |||||
constexpr size_t big_oc_step = 8; | |||||
constexpr size_t oc_step = 4; | |||||
constexpr size_t ih_step = 1; | |||||
constexpr size_t oh_step = 1; | |||||
constexpr size_t ow_step = 8; | |||||
constexpr size_t stride_h = stride; | |||||
constexpr size_t stride_w = stride; | |||||
constexpr int pack_iw_len = 16; | |||||
const size_t img_stride = oh * ow; | |||||
const size_t ow_end = ow / ow_step * ow_step; | |||||
const size_t ow_remain = ow - ow_end; | |||||
const size_t oc_end = oc / big_oc_step * big_oc_step; | |||||
const size_t oc_remain = oc - oc_end; | |||||
const int ld_dst_oc = oc_step * img_stride; | |||||
using remain_fun = std::function<void( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, | |||||
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||||
int ld_dst_oc, const Op& op)>; | |||||
remain_fun kern_big_oc_remain = nullptr; | |||||
remain_fun kern_small_oc_remain = nullptr; | |||||
switch (ow_remain) { | |||||
#define cb(step) \ | |||||
case step: \ | |||||
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||||
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||||
break; | |||||
UNROLL_CALL_RAW(8, cb); | |||||
default: | |||||
megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||||
} | |||||
for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
KerNeonXXs2NchwNchw44< | |||||
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||||
impl(src + src_offset, filter + weight_offset, | |||||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
op); | |||||
} | |||||
if (ow_remain > 0) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
kern_big_oc_remain( | |||||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
} | |||||
} | |||||
} | |||||
if (oc_remain > 0) { | |||||
size_t oc_idx = oc_end; | |||||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
KerNeonXXs2NchwNchw44< | |||||
bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||||
impl(src + src_offset, filter + weight_offset, | |||||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
op); | |||||
} | |||||
if (ow_remain > 0) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
kern_small_oc_remain( | |||||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
}; | |||||
#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | #define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | ||||
template struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||||
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | #define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | ||||
INSTANCE_CONV_KERN_FUN( \ | INSTANCE_CONV_KERN_FUN( \ | ||||
@@ -566,17 +133,10 @@ struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | ||||
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | ||||
#define INSTANCE_CONV_KERN(stride) \ | |||||
INSTANCE_BIAS_MODE_PARAM(stride, 1) \ | |||||
INSTANCE_BIAS_MODE_PARAM(stride, 2) \ | |||||
INSTANCE_BIAS_MODE_PARAM(stride, 3) \ | |||||
INSTANCE_BIAS_MODE_PARAM(stride, 5) \ | |||||
INSTANCE_BIAS_MODE_PARAM(stride, 7) | |||||
INSTANCE_CONV_KERN(1); | |||||
#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||||
} // namespace int8_direct_nchw_nchw44 | } // namespace int8_direct_nchw_nchw44 | ||||
} // namespace arm_common | } // namespace arm_common | ||||
} // namespace megdnn | } // namespace megdnn | ||||
// vim: syntax=cpp.doxygen | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,481 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h" | |||||
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h" | |||||
namespace megdnn { | |||||
namespace arm_common { | |||||
namespace { | |||||
/** | |||||
* @brief core code for calculation patten | |||||
* | |||||
* @tparam src_idx is offset of src reg | |||||
* @tparam weight_idx is offset of weight reg | |||||
* @tparam c_dim is output channel | |||||
* @tparam Func mla operation funcion | |||||
* @tparam stride | |||||
* @tparam T outpur regs type | |||||
* @tparam T2 src regs type | |||||
* @tparam T3 weight regs type | |||||
* @tparam T4 temp regs type | |||||
*/ | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3, typename T4> | |||||
struct ShiftCalHelper { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp); | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3, typename T4> | |||||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) { | |||||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl( | |||||
c, src, weight, temp); | |||||
} | |||||
template < | |||||
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2, | |||||
typename T3> | |||||
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) { | |||||
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl( | |||||
c, src, weight); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
c[0][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
c[1][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]); | |||||
c[0][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]); | |||||
c[1][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]); | |||||
c[0][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]); | |||||
c[1][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]); | |||||
c[0][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]); | |||||
c[1][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]); | |||||
c[0][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
c[1][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]); | |||||
c[0][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]); | |||||
c[1][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]); | |||||
c[0][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]); | |||||
c[1][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]); | |||||
c[0][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]); | |||||
c[1][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]); | |||||
} | |||||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
}; | |||||
template < | |||||
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4> | |||||
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> { | |||||
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) { | |||||
c[0][0] = vdotq_s32_h( | |||||
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]); | |||||
c[0][1] = vdotq_s32_h( | |||||
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]); | |||||
c[0][2] = vdotq_s32_h( | |||||
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]); | |||||
c[0][3] = vdotq_s32_h( | |||||
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]); | |||||
c[0][4] = vdotq_s32_h( | |||||
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]); | |||||
c[0][5] = vdotq_s32_h( | |||||
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]); | |||||
c[0][6] = vdotq_s32_h( | |||||
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]); | |||||
c[0][7] = vdotq_s32_h( | |||||
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]); | |||||
} | |||||
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&); | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 1; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 2; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 3; | |||||
constexpr int filter_width = 4; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 1; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( | |||||
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc); | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( | |||||
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0); | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 5; | |||||
constexpr int filter_width = 8; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 2; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
#define cb(step) \ | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
UNROLL_CALL_RAW(5, cb); | |||||
#undef cb | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
template <BiasMode bias_mode, typename Op, int remain_w, int oc_block> | |||||
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> { | |||||
static void impl( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr, | |||||
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) { | |||||
constexpr int stride = 1; | |||||
constexpr int filter_height = 7; | |||||
constexpr int filter_width = 8; | |||||
constexpr int oc_step = 4; | |||||
constexpr int loop_ic_step = 1; | |||||
constexpr int simd_len = 16; | |||||
constexpr int pack_iw_len = 16; | |||||
constexpr int src_reg = 8; | |||||
constexpr int weight_reg = 2; | |||||
const int ic_stride = ih * iw * pack_iw_len; | |||||
const int ld_weight_oc = oc_step * filter_height * filter_width * ic; | |||||
constexpr int c_dim = OCHelper<oc_block>::val; | |||||
int32x4_t c[c_dim][8]; | |||||
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step); | |||||
for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) { | |||||
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride; | |||||
int8x16_t src[src_reg]; | |||||
int8x16_t dot4_weight[c_dim][weight_reg]; | |||||
int16x8_t temp_c[4]; | |||||
#define cb(step) \ | |||||
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \ | |||||
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \ | |||||
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \ | |||||
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \ | |||||
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \ | |||||
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \ | |||||
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c); | |||||
UNROLL_CALL_RAW(7, cb); | |||||
#undef cb | |||||
weight_ptr += oc_step * filter_height * filter_width; | |||||
} | |||||
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>( | |||||
c, op, dst_ptr, ld_dst_oc); | |||||
} | |||||
}; | |||||
} // namespace | |||||
namespace int8_direct_nchw_nchw44 { | |||||
/** | |||||
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)} | |||||
* pack interleave two adjacent row in filter to one row | |||||
* */ | |||||
template <BiasMode bias_mode, typename Op, size_t filter_size> | |||||
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> { | |||||
static void impl( | |||||
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp, | |||||
int8_t* dst, const size_t oc, const size_t ic, const size_t ih, | |||||
const size_t iw, const size_t oh, const size_t ow, const Op& op) { | |||||
MEGDNN_MARK_USED_VAR(temp); | |||||
constexpr int stride = 1; | |||||
constexpr size_t fh = filter_size; | |||||
constexpr size_t fw = (filter_size + 3) / 4 * 4; | |||||
constexpr size_t ic_step = 1; | |||||
constexpr size_t big_oc_step = 8; | |||||
constexpr size_t oc_step = 4; | |||||
constexpr size_t ih_step = 1; | |||||
constexpr size_t oh_step = 1; | |||||
constexpr size_t ow_step = 8; | |||||
constexpr size_t stride_h = stride; | |||||
constexpr size_t stride_w = stride; | |||||
constexpr int pack_iw_len = 16; | |||||
const size_t img_stride = oh * ow; | |||||
const size_t ow_end = ow / ow_step * ow_step; | |||||
const size_t ow_remain = ow - ow_end; | |||||
const size_t oc_end = oc / big_oc_step * big_oc_step; | |||||
const size_t oc_remain = oc - oc_end; | |||||
const int ld_dst_oc = oc_step * img_stride; | |||||
using remain_fun = std::function<void( | |||||
const int8_t* src_ptr, const int8_t* weight_ptr, | |||||
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw, | |||||
int ld_dst_oc, const Op& op)>; | |||||
remain_fun kern_big_oc_remain = nullptr; | |||||
remain_fun kern_small_oc_remain = nullptr; | |||||
switch (ow_remain) { | |||||
#define cb(step) \ | |||||
case step: \ | |||||
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \ | |||||
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \ | |||||
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \ | |||||
break; | |||||
UNROLL_CALL_RAW(8, cb); | |||||
default: | |||||
megdnn_assert(0, "no remain %zu for kern", ow_remain); | |||||
} | |||||
for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) { | |||||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
KerNeonXXs2NchwNchw44< | |||||
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>:: | |||||
impl(src + src_offset, filter + weight_offset, | |||||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
op); | |||||
} | |||||
if (ow_remain > 0) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
kern_big_oc_remain( | |||||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
} | |||||
} | |||||
} | |||||
if (oc_remain > 0) { | |||||
size_t oc_idx = oc_end; | |||||
const size_t weight_offset = oc_idx * ic * fh * fw; | |||||
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) { | |||||
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step; | |||||
KerNeonXXs2NchwNchw44< | |||||
bias_mode, Op, ow_step, filter_size, oc_step, stride>:: | |||||
impl(src + src_offset, filter + weight_offset, | |||||
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc, | |||||
op); | |||||
} | |||||
if (ow_remain > 0) { | |||||
const size_t src_offset = | |||||
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) * | |||||
ic_step * pack_iw_len; | |||||
const size_t dst_offset = | |||||
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step; | |||||
kern_small_oc_remain( | |||||
src + src_offset, filter + weight_offset, bias + oc_idx, | |||||
dst + dst_offset, ic, ih, iw, ld_dst_oc, op); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
}; | |||||
#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \ | |||||
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \ | |||||
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>; | |||||
#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \ | |||||
INSTANCE_CONV_KERN_FUN( \ | |||||
stride, filter, bias_mode, TypeCvtOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||||
INSTANCE_CONV_KERN_FUN( \ | |||||
stride, filter, bias_mode, ReluOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \ | |||||
INSTANCE_CONV_KERN_FUN( \ | |||||
stride, filter, bias_mode, HSwishOp<dt_qint32 MEGDNN_COMMA dt_qint8>) | |||||
#define INSTANCE_BIAS_MODE_PARAM(stride, filter) \ | |||||
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \ | |||||
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) | |||||
#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter) | |||||
} // namespace int8_direct_nchw_nchw44 | |||||
} // namespace arm_common | |||||
} // namespace megdnn | |||||
// vim: syntax=cpp.doxygen |
@@ -0,0 +1,19 @@ | |||||
/** | |||||
* \file | |||||
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp | |||||
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License") | |||||
* | |||||
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved. | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, | |||||
* software distributed under the License is distributed on an | |||||
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
* implied. | |||||
*/ | |||||
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h" | |||||
using namespace megdnn; | |||||
using namespace arm_common; | |||||
INSTANCE_CONV_KERN(1, 1); | |||||
// vim: syntax=cpp.doxygen |