Browse Source

Merge pull request #431 from MegEngine/try-import

tags/v1.8.0
XindaH GitHub 3 years ago
parent
commit
bd3c4a0527
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 3751 additions and 2443 deletions
  1. +2
    -0
      .github/workflows/ci.yml
  2. +1006
    -821
      CMakeLists.txt
  3. +2
    -1
      ci/cmake.sh
  4. +63
    -77
      cmake/BuildFlatBuffers.cmake
  5. +27
    -31
      cmake/FetchMegBrainVersion.cmake
  6. +24
    -15
      cmake/Halide.cmake
  7. +21
    -15
      cmake/MKL_DNN.cmake
  8. +26
    -21
      cmake/Modules/FindNumPy.cmake
  9. +30
    -28
      cmake/OpenBLAS.cmake
  10. +18
    -18
      cmake/aclrt.cmake
  11. +44
    -31
      cmake/cndev.cmake
  12. +36
    -27
      cmake/cnlight.cmake
  13. +35
    -27
      cmake/cnml.cmake
  14. +73
    -53
      cmake/cnnl.cmake
  15. +35
    -27
      cmake/cnrt.cmake
  16. +5
    -2
      cmake/cpp_redis.cmake
  17. +17
    -17
      cmake/cpuinfo.cmake
  18. +53
    -43
      cmake/cudnn.cmake
  19. +41
    -21
      cmake/flatbuffers.cmake
  20. +2
    -1
      cmake/gflags.cmake
  21. +2
    -2
      cmake/gtest.cmake
  22. +109
    -62
      cmake/llvm-project.cmake
  23. +44
    -34
      cmake/magicmind.cmake
  24. +65
    -59
      cmake/mkl.cmake
  25. +64
    -56
      cmake/protobuf.cmake
  26. +56
    -45
      cmake/rocm.cmake
  27. +153
    -130
      cmake/tensorrt.cmake
  28. +19
    -13
      cmake/zmq.cmake
  29. +32
    -37
      dnn/CMakeLists.txt
  30. +4
    -2
      dnn/atlas-stub/CMakeLists.txt
  31. +12
    -11
      dnn/cuda-stub/CMakeLists.txt
  32. +6
    -0
      dnn/include/megdnn/common.h
  33. +113
    -0
      dnn/include/megdnn/oprs/nn.h
  34. +12
    -0
      dnn/scripts/opr_param_defs.py
  35. +221
    -195
      dnn/src/CMakeLists.txt
  36. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp
  37. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp
  38. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp
  39. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp
  40. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp
  41. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp
  42. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp
  43. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp
  44. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp
  45. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp
  46. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp
  47. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp
  48. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp
  49. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp
  50. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp
  51. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp
  52. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp
  53. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp
  54. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp
  55. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp
  56. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp
  57. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp
  58. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp
  59. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp
  60. +8
    -5
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h
  61. +8
    -5
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h
  62. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp
  63. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp
  64. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp
  65. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp
  66. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp
  67. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp
  68. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp
  69. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp
  70. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias.cpp
  71. +3
    -2
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp
  72. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp
  73. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp
  74. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp
  75. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp
  76. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp
  77. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp
  78. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp
  79. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp
  80. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp
  81. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp
  82. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp
  83. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp
  84. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp
  85. +15
    -0
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp
  86. +6
    -4
      dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h
  87. +3
    -18
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h
  88. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp
  89. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp
  90. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp
  91. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp
  92. +3
    -18
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h
  93. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp
  94. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp
  95. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp
  96. +21
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp
  97. +2
    -2
      dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h
  98. +5
    -445
      dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp
  99. +481
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h
  100. +19
    -0
      dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp

+ 2
- 0
.github/workflows/ci.yml View File

@@ -29,6 +29,7 @@ jobs:
uses: actions/checkout@v2
- name: Checkout submodules
run: |
apt update&&apt install ninja-build
./third_party/prepare.sh
./third_party/install-mkl.sh
- name: Build MegEngine
@@ -57,6 +58,7 @@ jobs:
uses: actions/checkout@v2
- name: Checkout submodules
run: |
apt update&&apt install ninja-build
./third_party/prepare.sh
./third_party/install-mkl.sh
- name: Build MegEngine


+ 1006
- 821
CMakeLists.txt
File diff suppressed because it is too large
View File


+ 2
- 1
ci/cmake.sh View File

@@ -27,7 +27,8 @@ function build() {
-DMGE_WITH_DISTRIBUTED=${DMGE_WITH_DISTRIBUTED} \
-DMGE_WITH_CUDA=${DMGE_WITH_CUDA} \
-DMGE_WITH_TEST=ON \
-DCMAKE_BUILD_TYPE=RelWithDebInfo
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DMGE_WITH_CUSTOM_OP=ON
make -j$(($(nproc) * 2)) -I ${build_dir}
make develop
popd >/dev/null


+ 63
- 77
cmake/BuildFlatBuffers.cmake View File

@@ -1,59 +1,56 @@
# Copyright 2015 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
# file except in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

# General function to create FlatBuffer build rules for the given list of
# schemas.
# General function to create FlatBuffer build rules for the given list of schemas.
#
# flatbuffers_schemas: A list of flatbuffer schema files to process.
#
# schema_include_dirs: A list of schema file include directories, which will be
# passed to flatc via the -I parameter.
# schema_include_dirs: A list of schema file include directories, which will be passed
# to flatc via the -I parameter.
#
# custom_target_name: The generated files will be added as dependencies for a
# new custom target with this name. You should add that target as a dependency
# for your main target to ensure these files are built. You can also retrieve
# various properties from this target, such as GENERATED_INCLUDES_DIR,
# BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR.
# custom_target_name: The generated files will be added as dependencies for a new custom
# target with this name. You should add that target as a dependency for your main target
# to ensure these files are built. You can also retrieve various properties from this
# target, such as GENERATED_INCLUDES_DIR, BINARY_SCHEMAS_DIR, and COPY_TEXT_SCHEMAS_DIR.
#
# additional_dependencies: A list of additional dependencies that you'd like
# all generated files to depend on. Pass in a blank string if you have none.
# additional_dependencies: A list of additional dependencies that you'd like all
# generated files to depend on. Pass in a blank string if you have none.
#
# generated_includes_dir: Where to generate the C++ header files for these
# schemas. The generated includes directory will automatically be added to
# CMake's include directories, and will be where generated header files are
# placed. This parameter is optional; pass in empty string if you don't want to
# generate include files for these schemas.
# generated_includes_dir: Where to generate the C++ header files for these schemas. The
# generated includes directory will automatically be added to CMake's include
# directories, and will be where generated header files are placed. This parameter is
# optional; pass in empty string if you don't want to generate include files for these
# schemas.
#
# binary_schemas_dir: If you specify an optional binary schema directory, binary
# schemas will be generated for these schemas as well, and placed into the given
# directory.
# binary_schemas_dir: If you specify an optional binary schema directory, binary schemas
# will be generated for these schemas as well, and placed into the given directory.
#
# copy_text_schemas_dir: If you want all text schemas (including schemas from
# all schema include directories) copied into a directory (for example, if you
# need them within your project to build JSON files), you can specify that
# folder here. All text schemas will be copied to that folder.
# copy_text_schemas_dir: If you want all text schemas (including schemas from all schema
# include directories) copied into a directory (for example, if you need them within
# your project to build JSON files), you can specify that folder here. All text schemas
# will be copied to that folder.
#
# IMPORTANT: Make sure you quote all list arguments you pass to this function!
# Otherwise CMake will only pass in the first element.
# Example: build_flatbuffers("${fb_files}" "${include_dirs}" target_name ...)
function(build_flatbuffers flatbuffers_schemas
schema_include_dirs
custom_target_name
additional_dependencies
generated_includes_dir
binary_schemas_dir
copy_text_schemas_dir)
# IMPORTANT: Make sure you quote all list arguments you pass to this function! Otherwise
# CMake will only pass in the first element. Example: build_flatbuffers("${fb_files}"
# "${include_dirs}" target_name ...)
function(
build_flatbuffers
flatbuffers_schemas
schema_include_dirs
custom_target_name
additional_dependencies
generated_includes_dir
binary_schemas_dir
copy_text_schemas_dir)

# Test if including from FindFlatBuffers
if(FLATBUFFERS_FLATC_EXECUTABLE)
@@ -65,10 +62,7 @@ function(build_flatbuffers flatbuffers_schemas
endif()
set(FLATC_SCHEMA_ARGS --gen-mutable)
if(FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS)
set(FLATC_SCHEMA_ARGS
${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS}
${FLATC_SCHEMA_ARGS}
)
set(FLATC_SCHEMA_ARGS ${FLATBUFFERS_FLATC_SCHEMA_EXTRA_ARGS} ${FLATC_SCHEMA_ARGS})
endif()

set(working_dir "${CMAKE_CURRENT_SOURCE_DIR}")
@@ -77,12 +71,12 @@ function(build_flatbuffers flatbuffers_schemas
# Generate the include files parameters.
set(include_params "")
set(all_generated_files "")
foreach (include_dir ${schema_include_dirs})
foreach(include_dir ${schema_include_dirs})
set(include_params -I ${include_dir} ${include_params})
if (NOT ${copy_text_schemas_dir} STREQUAL "")
if(NOT ${copy_text_schemas_dir} STREQUAL "")
# Copy text schemas from dependent folders.
file(GLOB_RECURSE dependent_schemas ${include_dir}/${schema_glob})
foreach (dependent_schema ${dependent_schemas})
foreach(dependent_schema ${dependent_schemas})
file(COPY ${dependent_schema} DESTINATION ${copy_text_schemas_dir})
endforeach()
endif()
@@ -91,62 +85,54 @@ function(build_flatbuffers flatbuffers_schemas
foreach(schema ${flatbuffers_schemas})
get_filename_component(filename ${schema} NAME_WE)
# For each schema, do the things we requested.
if (NOT ${generated_includes_dir} STREQUAL "")
if(NOT ${generated_includes_dir} STREQUAL "")
set(generated_include ${generated_includes_dir}/${filename}_generated.h)
add_custom_command(
OUTPUT ${generated_include}
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS}
-o ${generated_includes_dir}
${include_params}
-c ${schema}
COMMAND ${FLATC} ${FLATC_SCHEMA_ARGS} -o ${generated_includes_dir}
${include_params} -c ${schema}
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies}
WORKING_DIRECTORY "${working_dir}")
list(APPEND all_generated_files ${generated_include})
endif()

if (NOT ${binary_schemas_dir} STREQUAL "")
if(NOT ${binary_schemas_dir} STREQUAL "")
set(binary_schema ${binary_schemas_dir}/${filename}.bfbs)
add_custom_command(
OUTPUT ${binary_schema}
COMMAND ${FLATC} -b --schema
-o ${binary_schemas_dir}
${include_params}
${schema}
COMMAND ${FLATC} -b --schema -o ${binary_schemas_dir} ${include_params}
${schema}
DEPENDS ${FLATC_TARGET} ${schema} ${additional_dependencies}
WORKING_DIRECTORY "${working_dir}")
list(APPEND all_generated_files ${binary_schema})
endif()

if (NOT ${copy_text_schemas_dir} STREQUAL "")
if(NOT ${copy_text_schemas_dir} STREQUAL "")
file(COPY ${schema} DESTINATION ${copy_text_schemas_dir})
endif()
endforeach()

# Create a custom target that depends on all the generated files.
# This is the target that you can depend on to trigger all these
# to be built.
add_custom_target(${custom_target_name}
DEPENDS ${all_generated_files} ${additional_dependencies})
# Create a custom target that depends on all the generated files. This is the target
# that you can depend on to trigger all these to be built.
add_custom_target(${custom_target_name} DEPENDS ${all_generated_files}
${additional_dependencies})

# Register the include directory we are using.
if (NOT ${generated_includes_dir} STREQUAL "")
if(NOT ${generated_includes_dir} STREQUAL "")
include_directories(${generated_includes_dir})
set_property(TARGET ${custom_target_name}
PROPERTY GENERATED_INCLUDES_DIR
${generated_includes_dir})
set_property(TARGET ${custom_target_name} PROPERTY GENERATED_INCLUDES_DIR
${generated_includes_dir})
endif()

# Register the binary schemas dir we are using.
if (NOT ${binary_schemas_dir} STREQUAL "")
set_property(TARGET ${custom_target_name}
PROPERTY BINARY_SCHEMAS_DIR
${binary_schemas_dir})
if(NOT ${binary_schemas_dir} STREQUAL "")
set_property(TARGET ${custom_target_name} PROPERTY BINARY_SCHEMAS_DIR
${binary_schemas_dir})
endif()

# Register the text schema copy dir we are using.
if (NOT ${copy_text_schemas_dir} STREQUAL "")
set_property(TARGET ${custom_target_name}
PROPERTY COPY_TEXT_SCHEMAS_DIR
${copy_text_schemas_dir})
if(NOT ${copy_text_schemas_dir} STREQUAL "")
set_property(TARGET ${custom_target_name} PROPERTY COPY_TEXT_SCHEMAS_DIR
${copy_text_schemas_dir})
endif()
endfunction()

+ 27
- 31
cmake/FetchMegBrainVersion.cmake View File

@@ -1,49 +1,45 @@
# Parses the version set in src/core/include/megbrain/version.h
# Exports the following variables:
# MGB_VER_MAJOR: Major version
# MGB_VER_MINOR: Minor version
# MGB_VER_PATCH: Patch version
# MGB_IS_DEV: Is development version
# MGB_VER_STRING: Version string
# Parses the version set in src/core/include/megbrain/version.h Exports the following
# variables: MGB_VER_MAJOR: Major version MGB_VER_MINOR: Minor version MGB_VER_PATCH:
# Patch version MGB_IS_DEV: Is development version MGB_VER_STRING: Version string
option(MGB_FORCE_DEV_VERSION "Force -dev tag in version stamp" OFF)

file (READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content)
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/src/core/include/megbrain/version.h" content)

string (REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content})
set (MGB_VER_MAJOR ${CMAKE_MATCH_1})
string(REGEX MATCH "MGB_MAJOR +([0-9]+)" _ ${content})
set(MGB_VER_MAJOR ${CMAKE_MATCH_1})

string (REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content})
set (MGB_VER_MINOR ${CMAKE_MATCH_1})
string(REGEX MATCH "MGB_MINOR +([0-9]+)" _ ${content})
set(MGB_VER_MINOR ${CMAKE_MATCH_1})

string (REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content})
set (MGB_VER_PATCH ${CMAKE_MATCH_1})
string(REGEX MATCH "MGB_PATCH *([0-9]+)" _ ${content})
set(MGB_VER_PATCH ${CMAKE_MATCH_1})

string (REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content})
set (MGE_VER_MAJOR ${CMAKE_MATCH_1})
string(REGEX MATCH "MGE_MAJOR +([0-9]+)" _ ${content})
set(MGE_VER_MAJOR ${CMAKE_MATCH_1})

string (REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content})
set (MGE_VER_MINOR ${CMAKE_MATCH_1})
string(REGEX MATCH "MGE_MINOR +([0-9]+)" _ ${content})
set(MGE_VER_MINOR ${CMAKE_MATCH_1})

string (REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content})
set (MGE_VER_PATCH ${CMAKE_MATCH_1})
string(REGEX MATCH "MGE_PATCH *([0-9]+)" _ ${content})
set(MGE_VER_PATCH ${CMAKE_MATCH_1})

string (REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content})
set (MGE_EXTRA_NAME ${CMAKE_MATCH_1})
string(REGEX MATCH "MGE_EXTRA_NAME *\"(.*)\"" _ ${content})
set(MGE_EXTRA_NAME ${CMAKE_MATCH_1})

if (MGB_FORCE_DEV_VERSION)
set (MGB_IS_DEV 1)
if(MGB_FORCE_DEV_VERSION)
set(MGB_IS_DEV 1)
else()
string (REGEX MATCH "MGB_IS_DEV +([01])" _ ${content})
set (MGB_IS_DEV ${CMAKE_MATCH_1})
string(REGEX MATCH "MGB_IS_DEV +([01])" _ ${content})
set(MGB_IS_DEV ${CMAKE_MATCH_1})
endif()

if (DEFINED MGB_VER_MAJOR)
set (MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}")
if(DEFINED MGB_VER_MAJOR)
set(MGB_VER_STRING "${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}")
else()
set (MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}")
set(MGB_VER_STRING "${MGE_VER_MAJOR}.${MGE_VER_MINOR}.${MGE_VER_PATCH}")
endif(DEFINED MGB_VER_MAJOR)
if (MGB_IS_DEV)
set (MGB_VER_STRING "${MGB_VER_STRING}-dev")
if(MGB_IS_DEV)
set(MGB_VER_STRING "${MGB_VER_STRING}-dev")
endif()

message(STATUS "Building MegBrain ${MGB_VER_STRING}")

+ 24
- 15
cmake/Halide.cmake View File

@@ -2,31 +2,40 @@
include(ExternalProject)
find_package(LLVM 6.0 REQUIRED CONFIG)

STRING(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION})
string(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION})
list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR)
list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR)

set(HALIDE_DIR "${PROJECT_SOURCE_DIR}/third_party/Halide" CACHE STRING "halide directory")
set(HALIDE_DIR
"${PROJECT_SOURCE_DIR}/third_party/Halide"
CACHE STRING "halide directory")
set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide)
set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a)
ExternalProject_add(
halide
SOURCE_DIR ${HALIDE_DIR}
PREFIX ${HALIDE_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} -DWITH_APPS=OFF -DWITH_TESTS=OFF -DWITH_TUTORIALS=OFF -DHALIDE_SHARED_LIBRARY=OFF -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DTARGET_MIPS=OFF -DTARGET_POWERPC=OFF
BUILD_BYPRODUCTS ${HALIDE_LIB}
)
ExternalProject_Add(
halide
SOURCE_DIR ${HALIDE_DIR}
PREFIX ${HALIDE_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR}
-DWITH_APPS=OFF
-DWITH_TESTS=OFF
-DWITH_TUTORIALS=OFF
-DHALIDE_SHARED_LIBRARY=OFF
-DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DTARGET_MIPS=OFF
-DTARGET_POWERPC=OFF
BUILD_BYPRODUCTS ${HALIDE_LIB})

set(HALIDE_INC ${HALIDE_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${HALIDE_INC})
add_library(libhalide STATIC IMPORTED GLOBAL)
add_dependencies(libhalide halide)
set_target_properties(
libhalide PROPERTIES
IMPORTED_LOCATION ${HALIDE_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC}
)
set_target_properties(libhalide PROPERTIES IMPORTED_LOCATION ${HALIDE_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC})

set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU)
llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS})


+ 21
- 15
cmake/MKL_DNN.cmake View File

@@ -1,25 +1,31 @@
if (MGE_USE_SYSTEM_LIB)
find_package(dnnl)
if (dnnl_FOUND)
message(STATUS "Using system provided MKL-DNN.")
set (MGE_USE_SYSTEM_MKLDNN ON)
return()
endif()
if(MGE_USE_SYSTEM_LIB)
find_package(dnnl)
if(dnnl_FOUND)
message(STATUS "Using system provided MKL-DNN.")
set(MGE_USE_SYSTEM_MKLDNN ON)
return()
endif()
endif()
option(DNNL_BUILD_TESTS "" OFF)
option(DNNL_BUILD_EXAMPLES "" OFF)
# we do not want to use OMP now, so config to CPU mode
# if set to OMP, some dnnl algo will be more fast
set(DNNL_CPU_RUNTIME "SEQ" CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ")
# we do not want to use OMP now, so config to CPU mode if set to OMP, some dnnl algo
# will be more fast
set(DNNL_CPU_RUNTIME
"SEQ"
CACHE STRING "config dnnl to DNNL_RUNTIME_SEQ")
if(MGE_BLAS STREQUAL "MKL")
option(_DNNL_USE_MKL "" ON)
set(MKLROOT ${MKL_ROOT_DIR} CACHE STRING "MKL ROOT FOR DNNL")
set(MKLLIB libmkl)
option(_DNNL_USE_MKL "" ON)
set(MKLROOT
${MKL_ROOT_DIR}
CACHE STRING "MKL ROOT FOR DNNL")
set(MKLLIB libmkl)
else()
option(_DNNL_USE_MKL "" OFF)
option(_DNNL_USE_MKL "" OFF)
endif()

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter -Wno-extra")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-extra")
set(DNNL_LIBRARY_TYPE STATIC CACHE STRING "config dnnl to STATIC")
set(DNNL_LIBRARY_TYPE
STATIC
CACHE STRING "config dnnl to STATIC")
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn)

+ 26
- 21
cmake/Modules/FindNumPy.cmake View File

@@ -1,30 +1,28 @@
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
# * Find the NumPy libraries This module finds if NumPy is installed, and sets the
# following variables indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
# NUMPY_FOUND - was NumPy found NUMPY_VERSION - the version of
# NumPy found as a string NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy NUMPY_VERSION_PATCH -
# the patch version number of NumPy NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is
# 10601 NUMPY_INCLUDE_DIR - path to the NumPy include files

unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)

if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)

if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)

@@ -33,13 +31,18 @@ if(PYTHONINTERP_FOUND)
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
math(
EXPR
NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}"
)
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(
STATUS
"Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
@@ -47,8 +50,10 @@ else()
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
find_package_handle_standard_args(
NumPy
REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)

if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")


+ 30
- 28
cmake/OpenBLAS.cmake View File

@@ -1,48 +1,50 @@
if (MGE_USE_SYSTEM_LIB)
find_package(OpenBLAS)
set (MGE_USE_SYSTEM_OPENBLAS ON)

message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}")
add_library(libopenblas IMPORTED GLOBAL)
set_target_properties(
libopenblas PROPERTIES
IMPORTED_LOCATION ${OpenBLAS_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS}
)
return()
if(MGE_USE_SYSTEM_LIB)
find_package(OpenBLAS)
set(MGE_USE_SYSTEM_OPENBLAS ON)

message(STATUS "Using system provided OpenBLAS ${OpenBLAS_VERSION}")
add_library(libopenblas IMPORTED GLOBAL)
set_target_properties(
libopenblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${OpenBLAS_INCLUDE_DIRS})
return()
endif()

include(ExternalProject)
include(GNUInstallDirs)

set(OPENBLAS_DIR "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" CACHE STRING "OpenBLAS directory")
set(OPENBLAS_DIR
"${PROJECT_SOURCE_DIR}/third_party/OpenBLAS"
CACHE STRING "OpenBLAS directory")
set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS)

set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include)
set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)

if(${CMAKE_GENERATOR} STREQUAL "Ninja")
set(MAKE_COMMAND make)
set(MAKE_COMMAND make)
else()
set(MAKE_COMMAND "$(MAKE)")
set(MAKE_COMMAND "$(MAKE)")
endif()

ExternalProject_add(
openblas
SOURCE_DIR ${OPENBLAS_DIR}
PREFIX ${OPENBLAS_BUILD_DIR}
CMAKE_GENERATOR "Unix Makefiles"
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_COMMAND ${MAKE_COMMAND}
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE}
)
ExternalProject_Add(
openblas
SOURCE_DIR ${OPENBLAS_DIR}
PREFIX ${OPENBLAS_BUILD_DIR}
CMAKE_GENERATOR "Unix Makefiles"
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR}
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_COMMAND ${MAKE_COMMAND}
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE})

file(MAKE_DIRECTORY ${OPENBLAS_INC})

add_library(libopenblas STATIC IMPORTED GLOBAL)
add_dependencies(libopenblas openblas)
set_target_properties(
libopenblas PROPERTIES
IMPORTED_LOCATION ${OPENBLAS_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include
)
libopenblas PROPERTIES IMPORTED_LOCATION ${OPENBLAS_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include)

+ 18
- 18
cmake/aclrt.cmake View File

@@ -1,31 +1,31 @@
find_library(ACLRT_LIBRARY
NAMES libascendcl.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES stub
DOC "ACL library." )
find_library(
ACLRT_LIBRARY
NAMES libascendcl.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{ACLRT_HOME}/lib64/stub" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES stub
DOC "ACL library.")

if(ACLRT_LIBRARY STREQUAL "ACLRT_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find ACLRT Library")
message(FATAL_ERROR "Can not find ACLRT Library")
endif()

get_filename_component(__found_aclrt_root "${ACLRT_LIBRARY}/../../../" REALPATH)
find_path(ACLRT_INCLUDE_DIR
NAMES acl/acl.h
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root}
PATH_SUFFIXES include
DOC "Path to ACLRT include directory." )
find_path(
ACLRT_INCLUDE_DIR
NAMES acl/acl.h
HINTS "$ENV{ACLRT_HOME}/include" ${__found_aclrt_root}
PATH_SUFFIXES include
DOC "Path to ACLRT include directory.")

if(ACLRT_INCLUDE_DIR STREQUAL "ACLRT_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find ACLRT Library")
message(FATAL_ERROR "Can not find ACLRT Library")
endif()

add_library(libascendcl SHARED IMPORTED)

set_target_properties(libascendcl PROPERTIES
IMPORTED_LOCATION ${ACLRT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR}
)
set_target_properties(
libascendcl PROPERTIES IMPORTED_LOCATION ${ACLRT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${ACLRT_INCLUDE_DIR})

message(STATUS "Found ACLRT: ${__found_aclrt_root}")


+ 44
- 31
cmake/cndev.cmake View File

@@ -1,44 +1,57 @@
find_library(CNDEV_LIBRARY
NAMES libcndev.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNDEV library." )
find_library(
CNDEV_LIBRARY
NAMES libcndev.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNDEV library.")

if(CNDEV_LIBRARY STREQUAL "CNDEV_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNDEV Library")
message(FATAL_ERROR "Can not find CNDEV Library")
endif()

get_filename_component(__found_cndev_root ${CNDEV_LIBRARY}/../.. REALPATH)
find_path(CNDEV_INCLUDE_DIR
NAMES cndev.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root}
PATH_SUFFIXES include
DOC "Path to CNDEV include directory." )
find_path(
CNDEV_INCLUDE_DIR
NAMES cndev.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cndev_root}
PATH_SUFFIXES include
DOC "Path to CNDEV include directory.")

if(CNDEV_INCLUDE_DIR STREQUAL "CNDEV_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNDEV Library")
message(FATAL_ERROR "Can not find CNDEV Library")
endif()

file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1 REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2 REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3 REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4 REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5 REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$")

string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1 "${CNDEV_1}")
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2 "${CNDEV_2}")
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3 "${CNDEV_3}")
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4 "${CNDEV_4}")
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5 "${CNDEV_5}")
set(CNDEV_VERSION_STRING "${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_1
REGEX "^#define CNDEV_VERSION_1 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_2
REGEX "^#define CNDEV_VERSION_2 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_3
REGEX "^#define CNDEV_VERSION_3 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_4
REGEX "^#define CNDEV_VERSION_4 [0-9]+.*$")
file(STRINGS "${CNDEV_INCLUDE_DIR}/cndev.h" CNDEV_5
REGEX "^#define CNDEV_VERSION_5 [0-9]+.*$")

string(REGEX REPLACE "^#define CNDEV_VERSION_1 ([0-9]+).*$" "\\1" CNDEV_VERSION_1
"${CNDEV_1}")
string(REGEX REPLACE "^#define CNDEV_VERSION_2 ([0-9]+).*$" "\\1" CNDEV_VERSION_2
"${CNDEV_2}")
string(REGEX REPLACE "^#define CNDEV_VERSION_3 ([0-9]+).*$" "\\1" CNDEV_VERSION_3
"${CNDEV_3}")
string(REGEX REPLACE "^#define CNDEV_VERSION_4 ([0-9]+).*$" "\\1" CNDEV_VERSION_4
"${CNDEV_4}")
string(REGEX REPLACE "^#define CNDEV_VERSION_5 ([0-9]+).*$" "\\1" CNDEV_VERSION_5
"${CNDEV_5}")
set(CNDEV_VERSION_STRING
"${CNDEV_VERSION_1}.${CNDEV_VERSION_2}.${CNDEV_VERSION_3}.${CNDEV_VERSION_4}.${CNDEV_VERSION_5}"
)

add_library(libcndev SHARED IMPORTED)

set_target_properties(libcndev PROPERTIES
IMPORTED_LOCATION ${CNDEV_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNDEV_INCLUDE_DIR}
)

message(STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})")
set_target_properties(
libcndev PROPERTIES IMPORTED_LOCATION ${CNDEV_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${CNDEV_INCLUDE_DIR})

message(
STATUS "Found CNDEV: ${__found_cndev_root} (found version: ${CNDEV_VERSION_STRING})")

+ 36
- 27
cmake/cnlight.cmake View File

@@ -1,40 +1,49 @@
find_library(CNLIGHT_LIBRARY
NAMES libcnlight.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNLIGHT library." )
find_library(
CNLIGHT_LIBRARY
NAMES libcnlight.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNLIGHT library.")

if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNLIGHT Library")
message(FATAL_ERROR "Can not find CNLIGHT Library")
endif()

get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../.." REALPATH)
find_path(CNLIGHT_INCLUDE_DIR
NAMES cnlight.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root}
PATH_SUFFIXES include
DOC "Path to CNLIGHT include directory." )
find_path(
CNLIGHT_INCLUDE_DIR
NAMES cnlight.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root}
PATH_SUFFIXES include
DOC "Path to CNLIGHT include directory.")

if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNLIGHT Library")
message(FATAL_ERROR "Can not find CNLIGHT Library")
endif()

file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}")
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}")
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}")
set(CNLIGHT_VERSION_STRING "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}")
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR
REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR
REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH
REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1"
CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}")
string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1"
CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}")
string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1"
CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}")
set(CNLIGHT_VERSION_STRING
"${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}")

add_library(libcnlight SHARED IMPORTED)

set_target_properties(libcnlight PROPERTIES
IMPORTED_LOCATION ${CNLIGHT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR}
)

message(STATUS "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})")
set_target_properties(
libcnlight PROPERTIES IMPORTED_LOCATION ${CNLIGHT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR})

message(
STATUS
"Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})")

+ 35
- 27
cmake/cnml.cmake View File

@@ -1,40 +1,48 @@
find_library(CNML_LIBRARY
NAMES libcnml.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNML library." )
find_library(
CNML_LIBRARY
NAMES libcnml.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNML library.")

if(CNML_LIBRARY STREQUAL "CNML_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNML Library")
message(FATAL_ERROR "Can not find CNML Library")
endif()

get_filename_component(__found_cnml_root "${CNML_LIBRARY}/../.." REALPATH)
find_path(CNML_INCLUDE_DIR
NAMES cnml.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root}
PATH_SUFFIXES include
DOC "Path to CNML include directory." )
find_path(
CNML_INCLUDE_DIR
NAMES cnml.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnml_root}
PATH_SUFFIXES include
DOC "Path to CNML include directory.")

if(CNML_INCLUDE_DIR STREQUAL "CNML_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNML Library")
message(FATAL_ERROR "Can not find CNML Library")
endif()

file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR "${CNML_MAJOR}")
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR "${CNML_MINOR}")
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH "${CNML_PATCH}")
set(CNML_VERSION_STRING "${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}")
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MAJOR
REGEX "^#define CNML_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_MINOR
REGEX "^#define CNML_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNML_INCLUDE_DIR}/cnml.h" CNML_PATCH
REGEX "^#define CNML_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNML_MAJOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MAJOR
"${CNML_MAJOR}")
string(REGEX REPLACE "^#define CNML_MINOR_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_MINOR
"${CNML_MINOR}")
string(REGEX REPLACE "^#define CNML_PATCH_VERSION ([0-9]+).*$" "\\1" CNML_VERSION_PATCH
"${CNML_PATCH}")
set(CNML_VERSION_STRING
"${CNML_VERSION_MAJOR}.${CNML_VERSION_MINOR}.${CNML_VERSION_PATCH}")

add_library(libcnml SHARED IMPORTED)

set_target_properties(libcnml PROPERTIES
IMPORTED_LOCATION ${CNML_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNML_INCLUDE_DIR}
)

message(STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})")
set_target_properties(
libcnml PROPERTIES IMPORTED_LOCATION ${CNML_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${CNML_INCLUDE_DIR})

message(
STATUS "Found CNML: ${__found_cnml_root} (found version: ${CNML_VERSION_STRING})")

+ 73
- 53
cmake/cnnl.cmake View File

@@ -1,80 +1,100 @@
find_library(CNNL_LIBRARY
NAMES libcnnl.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNNL library." )
find_library(
CNNL_LIBRARY
NAMES libcnnl.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNNL library.")

if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNNL Library")
message(FATAL_ERROR "Can not find CNNL Library")
endif()

get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../.." REALPATH)
find_path(CNNL_INCLUDE_DIR
NAMES cnnl.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root}
PATH_SUFFIXES include
DOC "Path to CNNL include directory." )
find_path(
CNNL_INCLUDE_DIR
NAMES cnnl.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root}
PATH_SUFFIXES include
DOC "Path to CNNL include directory.")

if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNNL Library")
message(FATAL_ERROR "Can not find CNNL Library")
endif()

file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR REGEX "^#define CNNL_MAJOR [0-9]+.*$")
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR REGEX "^#define CNNL_MINOR [0-9]+.*$")
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$")

string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR "${CNNL_MAJOR}")
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR "${CNNL_MINOR}")
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH "${CNNL_PATCH}")
set(CNNL_VERSION_STRING "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}")
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR
REGEX "^#define CNNL_MAJOR [0-9]+.*$")
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR
REGEX "^#define CNNL_MINOR [0-9]+.*$")
file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH
REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$")

string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR
"${CNNL_MAJOR}")
string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR
"${CNNL_MINOR}")
string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH
"${CNNL_PATCH}")
set(CNNL_VERSION_STRING
"${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}")

add_library(libcnnl SHARED IMPORTED)

set_target_properties(libcnnl PROPERTIES
IMPORTED_LOCATION ${CNNL_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_INCLUDE_DIR}
)
set_target_properties(
libcnnl PROPERTIES IMPORTED_LOCATION ${CNNL_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${CNNL_INCLUDE_DIR})

message(STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})")
message(
STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})")

find_library(CNNL_EXTRA_LIBRARY
NAMES libcnnl_extra.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNNL_EXTRA library." )
find_library(
CNNL_EXTRA_LIBRARY
NAMES libcnnl_extra.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNNL_EXTRA library.")

if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNNL_EXTRA Library")
message(FATAL_ERROR "Can not find CNNL_EXTRA Library")
endif()

get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../.." REALPATH)
find_path(CNNL_EXTRA_INCLUDE_DIR
NAMES cnnl_extra.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root}
PATH_SUFFIXES include
DOC "Path to CNNL_EXTRA include directory." )
find_path(
CNNL_EXTRA_INCLUDE_DIR
NAMES cnnl_extra.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root}
PATH_SUFFIXES include
DOC "Path to CNNL_EXTRA include directory.")

if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNNL_EXTRA Library")
message(FATAL_ERROR "Can not find CNNL_EXTRA Library")
endif()

file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$")
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$")
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$")

string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}")
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}")
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}")
set(CNNL_EXTRA_VERSION_STRING "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}")
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR
REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$")
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR
REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$")
file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH
REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$")

string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1"
CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}")
string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1"
CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}")
string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1"
CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}")
set(CNNL_EXTRA_VERSION_STRING
"${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}"
)

add_library(libcnnl_extra SHARED IMPORTED)

set_target_properties(libcnnl_extra PROPERTIES
IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR}
)

message(STATUS "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})")
set_target_properties(
libcnnl_extra PROPERTIES IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR})

message(
STATUS
"Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})"
)

+ 35
- 27
cmake/cnrt.cmake View File

@@ -1,40 +1,48 @@
find_library(CNRT_LIBRARY
NAMES libcnrt.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNRT library." )
find_library(
CNRT_LIBRARY
NAMES libcnrt.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CNRT library.")

if(CNRT_LIBRARY STREQUAL "CNRT_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CNRT Library")
message(FATAL_ERROR "Can not find CNRT Library")
endif()

get_filename_component(__found_cnrt_root ${CNRT_LIBRARY}/../../ REALPATH)
find_path(CNRT_INCLUDE_DIR
NAMES cnrt.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root}
PATH_SUFFIXES include
DOC "Path to CNRT include directory." )
find_path(
CNRT_INCLUDE_DIR
NAMES cnrt.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnrt_root}
PATH_SUFFIXES include
DOC "Path to CNRT include directory.")

if(CNRT_INCLUDE_DIR STREQUAL "CNRT_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CNRT Library")
message(FATAL_ERROR "Can not find CNRT Library")
endif()

file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR "${CNRT_MAJOR}")
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR "${CNRT_MINOR}")
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH "${CNRT_PATCH}")
set(CNRT_VERSION_STRING "${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}")
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MAJOR
REGEX "^#define CNRT_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_MINOR
REGEX "^#define CNRT_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${CNRT_INCLUDE_DIR}/cnrt.h" CNRT_PATCH
REGEX "^#define CNRT_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define CNRT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MAJOR
"${CNRT_MAJOR}")
string(REGEX REPLACE "^#define CNRT_MINOR_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_MINOR
"${CNRT_MINOR}")
string(REGEX REPLACE "^#define CNRT_PATCH_VERSION ([0-9]+).*$" "\\1" CNRT_VERSION_PATCH
"${CNRT_PATCH}")
set(CNRT_VERSION_STRING
"${CNRT_VERSION_MAJOR}.${CNRT_VERSION_MINOR}.${CNRT_VERSION_PATCH}")

add_library(libcnrt SHARED IMPORTED)

set_target_properties(libcnrt PROPERTIES
IMPORTED_LOCATION ${CNRT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CNRT_INCLUDE_DIR}
)

message(STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})")
set_target_properties(
libcnrt PROPERTIES IMPORTED_LOCATION ${CNRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${CNRT_INCLUDE_DIR})

message(
STATUS "Found CNRT: ${__found_cnrt_root} (found version: ${CNRT_VERSION_STRING})")

+ 5
- 2
cmake/cpp_redis.cmake View File

@@ -1,2 +1,5 @@
file(GLOB_RECURSE CPP_REDIS_SRCS ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp ${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp)
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes ${PROJECT_SOURCE_DIR}/third_party/tacopie/includes)
file(GLOB_RECURSE CPP_REDIS_SRCS
${PROJECT_SOURCE_DIR}/third_party/cpp_redis/sources/*.cpp
${PROJECT_SOURCE_DIR}/third_party/tacopie/sources/*.cpp)
set(CPP_REDIS_INCLUDES ${PROJECT_SOURCE_DIR}/third_party/cpp_redis/includes
${PROJECT_SOURCE_DIR}/third_party/tacopie/includes)

+ 17
- 17
cmake/cpuinfo.cmake View File

@@ -1,20 +1,20 @@
if (MGE_USE_SYSTEM_LIB)
find_package(Cpuinfo)
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}")
add_library(libcpuinfo IMPORTED GLOBAL)
set_target_properties(
libcpuinfo PROPERTIES
IMPORTED_LOCATION ${cpuinfo_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS}
)
return()
if(MGE_USE_SYSTEM_LIB)
find_package(Cpuinfo)
message(STATUS "Using system provided cpuinfo ${cpuinfo_VERSION}")
add_library(libcpuinfo IMPORTED GLOBAL)
set_target_properties(
libcpuinfo PROPERTIES IMPORTED_LOCATION ${cpuinfo_LIBRARIES}
INTERFACE_INCLUDE_DIRECTORIES ${cpuinfo_INCLUDE_DIRS})
return()
endif()

SET(CPUINFO_LIBRARY_TYPE "static" CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
OPTION(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF)
OPTION(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF)
OPTION(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF)
OPTION(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF)
set(CPUINFO_LIBRARY_TYPE
"static"
CACHE STRING "Type of cpuinfo library (shared, static, or default) to build")
option(CPUINFO_BUILD_TOOLS "Build command-line tools" OFF)
option(CPUINFO_BUILD_UNIT_TESTS "Build cpuinfo unit tests" OFF)
option(CPUINFO_BUILD_MOCK_TESTS "Build cpuinfo mock tests" OFF)
option(CPUINFO_BUILD_BENCHMARKS "Build cpuinfo micro-benchmarks" OFF)
include_directories("${PROJECT_SOURCE_DIR}/third_party/cpuinfo/include")
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo ${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/cpuinfo
${CMAKE_CURRENT_BINARY_DIR}/cpuinfo EXCLUDE_FROM_ALL)

+ 53
- 43
cmake/cudnn.cmake View File

@@ -1,73 +1,83 @@
find_package(PkgConfig)
if(${PkgConfig_FOUND})
pkg_check_modules(PC_CUDNN QUIET CUDNN)
pkg_check_modules(PC_CUDNN QUIET CUDNN)
endif()

if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "")
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR})
if("${CUDNN_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{CUDNN_ROOT_DIR}" STREQUAL "")
set(CUDNN_ROOT_DIR $ENV{CUDNN_ROOT_DIR})
endif()

if(MGE_CUDA_USE_STATIC AND NOT MGE_WITH_CUDNN_SHARED)
find_library(CUDNN_LIBRARY
NAMES libcudnn_static.a cudnn.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library." )
find_library(
CUDNN_LIBRARY
NAMES libcudnn_static.a cudnn.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS}
${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library.")
else()
find_library(CUDNN_LIBRARY
NAMES libcudnn.so libcudnn.dylib cudnn64.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library." )
find_library(
CUDNN_LIBRARY
NAMES libcudnn.so libcudnn.dylib cudnn64.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS}
${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library.")
endif()

if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env")
message(
FATAL_ERROR
"Can not find CuDNN Library, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env"
)
endif()

get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH)
find_path(CUDNN_INCLUDE_DIR
NAMES cudnn.h
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
PATH_SUFFIXES include
DOC "Path to CUDNN include directory." )
find_path(
CUDNN_INCLUDE_DIR
NAMES cudnn.h
HINTS $ENV{PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE}
${__found_cudnn_root}
PATH_SUFFIXES include
DOC "Path to CUDNN include directory.")

if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env")
message(
FATAL_ERROR
"Can not find CuDNN INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init CUDNN env"
)
endif()

if(EXISTS ${CUDNN_INCLUDE_DIR}/cudnn_version.h)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
else()
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
endif()

string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" CUDNN_MAJOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" CUDNN_MAJOR_VERSION
"${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" CUDNN_MINOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" CUDNN_MINOR_VERSION
"${CUDNN_MINOR_VERSION}")
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" CUDNN_PATCH_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" CUDNN_PATCH_VERSION
"${CUDNN_PATCH_VERSION}")
set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}.${CUDNN_PATCH_VERSION})



if(MGE_CUDA_USE_STATIC)
add_library(libcudnn STATIC IMPORTED)
add_library(libcudnn STATIC IMPORTED)
else()
add_library(libcudnn SHARED IMPORTED)
add_library(libcudnn SHARED IMPORTED)
endif()

set_target_properties(libcudnn PROPERTIES
IMPORTED_LOCATION ${CUDNN_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR})
set_target_properties(
libcudnn PROPERTIES IMPORTED_LOCATION ${CUDNN_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${CUDNN_INCLUDE_DIR})

message(STATUS "Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})")

+ 41
- 21
cmake/flatbuffers.cmake View File

@@ -1,27 +1,47 @@
if (MGE_USE_SYSTEM_LIB)
find_package(Flatbuffers REQUIRED)
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}")
include(cmake/BuildFlatBuffers.cmake)
return()
if(MGE_USE_SYSTEM_LIB)
find_package(Flatbuffers REQUIRED)
message(STATUS "Using system provided Flatbuffers ${Flatbuffers_VERSION}")
include(cmake/BuildFlatBuffers.cmake)
return()
endif()
if(MSVC OR WIN32)
message(DEBUG "add flags flatc for clang-cl build")
set(FLATC_FLAGS "")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal")
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0")
set(FLATC_FLAGS "${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override")
endif()
message(DEBUG "add flags flatc for clang-cl build")
set(FLATC_FLAGS "")
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=unknown-argument -Wno-error=c++98-compat -Wno-error=reserved-id-macro"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=sign-conversion -Wno-error=exceptions -Wno-error=argument-outside-range"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=delete-non-virtual-dtor -Wno-error=ignored-attributes -Wno-error=format"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=sign-compare -Wno-error=unused-private-field -Wno-error=braced-scalar-init"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=return-type-c-linkage -Wno-error=invalid-noreturn -Wno-error=c++98-compat-pedantic"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=extra-semi-stmt -Wno-error=missing-prototypes -Wno-error=documentation-unknown-command"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=missing-variable-declarations -Wno-error=nonportable-system-include-path"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=exit-time-destructors -Wno-error=unused-macros -Wno-error=global-constructors"
)
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=switch-enum -Wno-error=missing-noreturn -Wno-error=float-equal"
)
if(${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.0.0")
set(FLATC_FLAGS
"${FLATC_FLAGS} -Wno-error=suggest-override -Wno-error=suggest-destructor-override"
)
endif()

set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FLATC_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLATC_FLAGS}")
endif()

option(FLATBUFFERS_BUILD_TESTS "" OFF)


+ 2
- 1
cmake/gflags.cmake View File

@@ -1 +1,2 @@
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags ${CMAKE_CURRENT_BINARY_DIR}/gflags)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gflags
${CMAKE_CURRENT_BINARY_DIR}/gflags)

+ 2
- 2
cmake/gtest.cmake View File

@@ -1,2 +1,2 @@
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest
${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL)

+ 109
- 62
cmake/llvm-project.cmake View File

@@ -1,88 +1,136 @@
# - Find the llvm/mlir libraries
# This module finds if llvm/mlir is installed, or build llvm/mlir from source.
# This module sets the following variables.
# * Find the llvm/mlir libraries This module finds if llvm/mlir is installed, or build
# llvm/mlir from source. This module sets the following variables.
#
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files
# MLIR_LLVM_LIBS - path to the LLVM/MLIR libraries
# MLIR_LLVM_INCLUDE_DIR - path to the LLVM/MLIR include files MLIR_LLVM_LIBS - path
# to the LLVM/MLIR libraries
#
# This module define the following functions.
#
# external_tablegen_library - created interface library which depends on tablegen outputs
# external_tablegen_library - created interface library which depends on tablegen
# outputs

include(CMakeParseArguments)

function(external_tablegen_library)
cmake_parse_arguments(
_RULE
"TESTONLY"
"NAME;TBLGEN"
"SRCS;INCLUDES;OUTS"
${ARGN}
)
cmake_parse_arguments(_RULE "TESTONLY" "NAME;TBLGEN" "SRCS;INCLUDES;OUTS" ${ARGN})

if(_RULE_TESTONLY AND NOT MGE_WITH_TEST)
return()
endif()
if(_RULE_TESTONLY AND NOT MGE_WITH_TEST)
return()
endif()

set(_NAME ${_RULE_NAME})
set(_NAME ${_RULE_NAME})

set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS})
set(_INCLUDE_DIRS ${_RULE_INCLUDES})
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I")
set(_OUTPUTS)
while(_RULE_OUTS)
list(GET _RULE_OUTS 0 _COMMAND)
list(REMOVE_AT _RULE_OUTS 0)
list(GET _RULE_OUTS 0 _FILE)
list(REMOVE_AT _RULE_OUTS 0)
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS})
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE})
endwhile()
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS})
set(LLVM_TARGET_DEFINITIONS ${_RULE_SRCS})
set(_INCLUDE_DIRS ${_RULE_INCLUDES})
list(TRANSFORM _INCLUDE_DIRS PREPEND "-I")
set(_OUTPUTS)
while(_RULE_OUTS)
list(GET _RULE_OUTS 0 _COMMAND)
list(REMOVE_AT _RULE_OUTS 0)
list(GET _RULE_OUTS 0 _FILE)
list(REMOVE_AT _RULE_OUTS 0)
tablegen(${_RULE_TBLGEN} ${_FILE} ${_COMMAND} ${_INCLUDE_DIRS})
list(APPEND _OUTPUTS ${CMAKE_CURRENT_BINARY_DIR}/${_FILE})
endwhile()
add_custom_target(${_NAME}_target DEPENDS ${_OUTPUTS})

add_library(${_NAME} INTERFACE)
add_dependencies(${_NAME} ${_NAME}_target)
add_library(${_NAME} INTERFACE)
add_dependencies(${_NAME} ${_NAME}_target)

target_include_directories(${_NAME} INTERFACE
"$<BUILD_INTERFACE:${_RULE_INCLUDES}>")
target_include_directories(${_NAME} INTERFACE "$<BUILD_INTERFACE:${_RULE_INCLUDES}>")

install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS})
install(TARGETS ${_NAME} EXPORT ${MGE_EXPORT_TARGETS})
endfunction()

set(LLVM_LIBS LLVMCore LLVMSupport LLVMX86CodeGen LLVMOrcJIT LLVMNVPTXCodeGen LLVMNVPTXDesc LLVMNVPTXInfo)
set(MLIR_CORE_LIBS MLIRAnalysis MLIRExecutionEngine MLIRIR MLIRParser MLIRPass MLIRSideEffectInterfaces MLIRTransforms)
set(MLIR_DIALECT_LIBS MLIRAsync MLIRAVX512 MLIRGPU MLIRLLVMAVX512 MLIRNVVMIR MLIROpenACC MLIRPDL MLIRPDLInterp MLIRQuant MLIRROCDLIR MLIRSDBM MLIRShape MLIRSPIRV MLIRStandardOpsTransforms MLIRTosa)
set(MLIR_CONVERSION_LIBS MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms MLIRGPUToNVVMTransforms MLIRSCFToStandard)
set(LLVM_LIBS
LLVMCore
LLVMSupport
LLVMX86CodeGen
LLVMOrcJIT
LLVMNVPTXCodeGen
LLVMNVPTXDesc
LLVMNVPTXInfo)
set(MLIR_CORE_LIBS
MLIRAnalysis
MLIRExecutionEngine
MLIRIR
MLIRParser
MLIRPass
MLIRSideEffectInterfaces
MLIRTransforms)
set(MLIR_DIALECT_LIBS
MLIRAsync
MLIRAVX512
MLIRGPU
MLIRLLVMAVX512
MLIRNVVMIR
MLIROpenACC
MLIRPDL
MLIRPDLInterp
MLIRQuant
MLIRROCDLIR
MLIRSDBM
MLIRShape
MLIRSPIRV
MLIRStandardOpsTransforms
MLIRTosa)
set(MLIR_CONVERSION_LIBS
MLIRAffineToStandard MLIRAVX512ToLLVM MLIRGPUToGPURuntimeTransforms
MLIRGPUToNVVMTransforms MLIRSCFToStandard)
set(MLIR_TRANSLATION_LIBS MLIRTargetLLVMIR MLIRTargetNVVMIR)
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS} ${MLIR_TRANSLATION_LIBS})
set(MLIR_LIBS ${MLIR_CORE_LIBS} ${MLIR_DIALECT_LIBS} ${MLIR_CONVERSION_LIBS}
${MLIR_TRANSLATION_LIBS})
set(MLIR_LLVM_LIBS ${LLVM_LIBS} ${MLIR_LIBS})

function(add_mge_mlir_src_dep llvm_monorepo_path)
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}")
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$")
set(CMAKE_BUILD_TYPE "Debug")
endif()
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
set(_CMAKE_BUILD_TYPE "${CMAKE_BUILD_TYPE}")
string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
if(NOT uppercase_CMAKE_BUILD_TYPE MATCHES
"^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$")
set(CMAKE_BUILD_TYPE "Debug")
endif()
set(_CMAKE_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS
OFF
CACHE BOOL "" FORCE)

add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL)
add_subdirectory("${llvm_monorepo_path}/llvm" ${LLVM_BUILD_DIR} EXCLUDE_FROM_ALL)

# Reset CMAKE_BUILD_TYPE to its previous setting
set(CMAKE_BUILD_TYPE "${_CMAKE_BUILD_TYPE}" CACHE STRING "Build type" FORCE)
# Reset BUILD_SHARED_LIBS to its previous setting
set(BUILD_SHARED_LIBS ${_CMAKE_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libraries" FORCE)
# Reset CMAKE_BUILD_TYPE to its previous setting
set(CMAKE_BUILD_TYPE
"${_CMAKE_BUILD_TYPE}"
CACHE STRING "Build type" FORCE)
# Reset BUILD_SHARED_LIBS to its previous setting
set(BUILD_SHARED_LIBS
${_CMAKE_BUILD_SHARED_LIBS}
CACHE BOOL "Build shared libraries" FORCE)
endfunction()

# llvm build options
set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "" FORCE)
set(LLVM_ENABLE_BINDINGS OFF CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "" FORCE)
set(LLVM_ENABLE_RTTI ${MGE_ENABLE_RTTI} CACHE BOOL "" FORCE)
set(LLVM_TARGETS_TO_BUILD "X86;NVPTX;AArch64;ARM" CACHE STRING "" FORCE)
set(LLVM_ENABLE_PROJECTS "mlir" CACHE STRING "" FORCE)
set(LLVM_INCLUDE_EXAMPLES
OFF
CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_TESTS
OFF
CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_DOCS
OFF
CACHE BOOL "" FORCE)
set(LLVM_ENABLE_BINDINGS
OFF
CACHE BOOL "" FORCE)
set(LLVM_INCLUDE_BENCHMARKS
OFF
CACHE BOOL "" FORCE)
set(LLVM_ENABLE_RTTI
${MGE_ENABLE_RTTI}
CACHE BOOL "" FORCE)
set(LLVM_TARGETS_TO_BUILD
"X86;NVPTX;AArch64;ARM"
CACHE STRING "" FORCE)
set(LLVM_ENABLE_PROJECTS
"mlir"
CACHE STRING "" FORCE)
set(LLVM_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm)

add_mge_mlir_src_dep("third_party/llvm-project")
@@ -91,6 +139,5 @@ set(MLIR_LLVM_INCLUDE_DIR
${PROJECT_SOURCE_DIR}/third_party/llvm-project/llvm/include
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/include
${PROJECT_SOURCE_DIR}/third_party/llvm-project/mlir/include
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include
)
${PROJECT_BINARY_DIR}/third_party/llvm-project/llvm/tools/mlir/include)
set(MLIR_TABLEGEN_EXE mlir-tblgen)

+ 44
- 34
cmake/magicmind.cmake View File

@@ -1,54 +1,64 @@
find_library(MAGICMIND_LIBRARY
NAMES libmagicmind.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "MAGICMIND library." )
find_library(
MAGICMIND_LIBRARY
NAMES libmagicmind.so
PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "MAGICMIND library.")

if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find MAGICMIND Library")
message(FATAL_ERROR "Can not find MAGICMIND Library")
endif()

get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../../" REALPATH)
find_path(MAGICMIND_INCLUDE_DIR
NAMES common.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root}
PATH_SUFFIXES include
DOC "Path to MAGICMIND include directory." )
find_path(
MAGICMIND_INCLUDE_DIR
NAMES common.h
HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root}
PATH_SUFFIXES include
DOC "Path to MAGICMIND include directory.")

if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find MAGICMIND Library")
message(FATAL_ERROR "Can not find MAGICMIND Library")
endif()

file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR REGEX "^#define MM_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH REGEX "^#define MM_PATCH_VERSION [0-9]+.*$")
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR
REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$")
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR
REGEX "^#define MM_MINOR_VERSION [0-9]+.*$")
file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH
REGEX "^#define MM_PATCH_VERSION [0-9]+.*$")

string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}")
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}")
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}")
set(MAGICMIND_VERSION_STRING "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}")
string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1"
MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}")
string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1"
MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}")
string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1"
MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}")
set(MAGICMIND_VERSION_STRING
"${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}")

add_library(libmagicmind SHARED IMPORTED)

set_target_properties(libmagicmind PROPERTIES
IMPORTED_LOCATION ${MAGICMIND_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR}
)
set_target_properties(
libmagicmind PROPERTIES IMPORTED_LOCATION ${MAGICMIND_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR})

message(STATUS "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})")
message(
STATUS
"Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})"
)

find_library(MAGICMIND_RUNTIME_LIBRARY
NAMES libmagicmind_runtime.so
PATHS "${__found_magicmind_root}/lib64"
)
find_library(
MAGICMIND_RUNTIME_LIBRARY
NAMES libmagicmind_runtime.so
PATHS "${__found_magicmind_root}/lib64")

if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library")
message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library")
else()
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}")
message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}")
endif()
add_library(libmagicmind_runtime SHARED IMPORTED)
set_target_properties(libmagicmind_runtime PROPERTIES
IMPORTED_LOCATION ${MAGICMIND_RUNTIME_LIBRARY}
)
set_target_properties(libmagicmind_runtime PROPERTIES IMPORTED_LOCATION
${MAGICMIND_RUNTIME_LIBRARY})

+ 65
- 59
cmake/mkl.cmake View File

@@ -1,77 +1,83 @@
find_path(MKL_ROOT_DIR
include/mkl_cblas.h
PATHS
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32
$ENV{MKLDIR}
/opt/intel/mkl/*/
/opt/intel/cmkl/*/
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal
)
find_path(
MKL_ROOT_DIR include/mkl_cblas.h
PATHS ${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32/Library
${PROJECT_SOURCE_DIR}/third_party/mkl/x86_32
$ENV{MKLDIR}
/opt/intel/mkl/*/
/opt/intel/cmkl/*/
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal)

if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find MKL")
message(FATAL_ERROR "Can not find MKL")
endif()
message(STATUS "Build with MKL in ${MKL_ROOT_DIR}")

find_path(MKL_INCLUDE_DIR
mkl_cblas.h
PATHS
${MKL_ROOT_DIR}/include
${INCLUDE_INSTALL_DIR}
)
find_path(MKL_INCLUDE_DIR mkl_cblas.h PATHS ${MKL_ROOT_DIR}/include
${INCLUDE_INSTALL_DIR})

option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON)
if(MGE_MKL_USE_STATIC)
find_library(MKL_CORE_LIBRARY
NAMES libmkl_core.a mkl_core.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(
MKL_CORE_LIBRARY
NAMES libmkl_core.a mkl_core.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)

find_library(MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.a mkl_sequential.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(
MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.a mkl_sequential.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)

if(${MGE_ARCH} STREQUAL "x86_64")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "i386")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()
if(${MGE_ARCH} STREQUAL "x86_64")
find_library(
MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.a mkl_intel_ilp64.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "i386")
find_library(
MKL_IPL_LIBRARY
NAMES libmkl_intel_32.a mkl_intel_32.lib mkl_intel_c.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()

add_library(libmkl INTERFACE IMPORTED)
if(UNIX AND NOT APPLE)
target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group)
else()
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY})
endif()
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
add_library(libmkl INTERFACE IMPORTED)
if(UNIX AND NOT APPLE)
target_link_libraries(
libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY}
${MKL_IPL_LIBRARY} -Wl,--end-group)
else()
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY}
${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY})
endif()
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
else()
find_library(MKL_CORE_LIBRARY
NAMES libmkl_core.so libmkl_core.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(
MKL_CORE_LIBRARY
NAMES libmkl_core.so libmkl_core.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)

find_library(MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.so libmkl_sequential.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(
MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.so libmkl_sequential.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)

if(${MGE_ARCH} STREQUAL "x86_64")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "x86_32")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_32.so libmkl_intel_32.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY})
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
if(${MGE_ARCH} STREQUAL "x86_64")
find_library(
MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "x86_32")
find_library(
MKL_IPL_LIBRARY
NAMES libmkl_intel_32.so libmkl_intel_32.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY}
${MKL_IPL_LIBRARY})
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
endif()

if(${MGE_ARCH} STREQUAL "x86_64")
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64)
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64)
endif()

+ 64
- 56
cmake/protobuf.cmake View File

@@ -1,70 +1,83 @@
function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR)
if(NOT ARGN)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files")
return()
endif()
if(NOT ARGN)
message(
SEND_ERROR
"Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files")
return()
endif()

set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
set(ABS_FIL ${ROOT_DIR}/${FIL})
get_filename_component(FIL_WE ${FIL} NAME_WE)
get_filename_component(FIL_DIR ${ABS_FIL} PATH)
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR})
set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
set(ABS_FIL ${ROOT_DIR}/${FIL})
get_filename_component(FIL_WE ${FIL} NAME_WE)
get_filename_component(FIL_DIR ${ABS_FIL} PATH)
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR})

list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")

add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS}
DEPENDS ${ABS_FIL} libprotobuf
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
-I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS}
DEPENDS ${ABS_FIL} libprotobuf
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()

set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS}
${${SRCS}}
PARENT_SCOPE)
set(${HDRS}
${${HDRS}}
PARENT_SCOPE)
endfunction()

if(MGE_USE_SYSTEM_LIB)
find_package(Protobuf)
if(Protobuf_FOUND)
add_library(libprotobuf INTERFACE)
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES})
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS})
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY)
set(PROTOBUF_ROOT ${Protobuf_ROOT})
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS})
return()
endif()
find_package(Protobuf)
if(Protobuf_FOUND)
add_library(libprotobuf INTERFACE)
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES})
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS})
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY)
set(PROTOBUF_ROOT ${Protobuf_ROOT})
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS})
return()
endif()
endif()


include(ExternalProject)
include(GNUInstallDirs)

set(PROTOBUF_DIR "${PROJECT_SOURCE_DIR}/third_party/protobuf" CACHE STRING "protobuf directory")
set(PROTOBUF_DIR
"${PROJECT_SOURCE_DIR}/third_party/protobuf"
CACHE STRING "protobuf directory")
set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf)

if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a)
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a)
else()
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a)
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a)
endif()
set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc)

ExternalProject_add(
protobuf
SOURCE_DIR ${PROTOBUF_DIR}/cmake
PREFIX ${PROTOBUF_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} -Dprotobuf_BUILD_EXAMPLES=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE}
)
ExternalProject_Add(
protobuf
SOURCE_DIR ${PROTOBUF_DIR}/cmake
PREFIX ${PROTOBUF_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR}
-Dprotobuf_BUILD_EXAMPLES=OFF
-Dprotobuf_BUILD_TESTS=OFF
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE})

set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${PROTOBUF_INC})
@@ -72,19 +85,14 @@ file(MAKE_DIRECTORY ${PROTOBUF_INC})
add_library(libprotobuf STATIC IMPORTED GLOBAL)
add_dependencies(libprotobuf protobuf)
set_target_properties(
libprotobuf PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include
)
libprotobuf PROPERTIES IMPORTED_LOCATION ${PROTOBUF_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include)

add_executable(protoc IMPORTED GLOBAL)
add_dependencies(protoc protobuf)
set_target_properties(
protoc PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_BUILD_DIR}/bin/protoc
)
set_target_properties(protoc PROPERTIES IMPORTED_LOCATION
${PROTOBUF_BUILD_DIR}/bin/protoc)

set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR})
set(PROTOBUF_PROTOC_EXECUTABLE protoc)
set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include)


+ 56
- 45
cmake/rocm.cmake View File

@@ -1,28 +1,34 @@
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
endif()
if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH
"/opt/rocm/hip"
CACHE PATH "Path to which HIP has been installed")
else()
set(HIP_PATH
$ENV{HIP_PATH}
CACHE PATH "Path to which HIP has been installed")
endif()
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
find_package(HIP QUIET)
if (HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
else()
message(FATAL_ERROR "Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location.")
message(
FATAL_ERROR
"Could not find HIP. Ensure that HIP is either installed in /opt/rocm/hip or the variable HIP_PATH is set to point to the right location."
)
endif()

if (${HIP_VERSION} VERSION_LESS 3.0)
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.")
if(${HIP_VERSION} VERSION_LESS 3.0)
message(FATAL_ERROR "ROCM version needed 3. Please update ROCM.")
endif()

macro(hipconfig_get_option variable option)
if(NOT DEFINED ${variable})
execute_process(
COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option}
OUTPUT_VARIABLE ${variable})
endif()
if(NOT DEFINED ${variable})
execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} ${option}
OUTPUT_VARIABLE ${variable})
endif()
endmacro()

hipconfig_get_option(HIP_COMPILER "--compiler")
@@ -31,30 +37,33 @@ hipconfig_get_option(HIP_CPP_CONFIG "--cpp_config")
separate_arguments(HIP_CPP_CONFIG)

foreach(hip_config_item ${HIP_CPP_CONFIG})
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__")
if(${hip_config_item} STREQUAL "-D${macro_name}=")
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n")
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\
foreach(macro_name "__HIP_PLATFORM_HCC__" "__HIP_ROCclr__")
if(${hip_config_item} STREQUAL "-D${macro_name}=")
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name}\n")
set(HIP_CPP_UNDEFINE
"${HIP_CPP_UNDEFINE}\
#ifdef ${macro_name}\n#undef ${macro_name}\n\
#else\n#error\n\
#endif\n")
elseif(${hip_config_item} STREQUAL "-D${macro_name}")
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n")
set(HIP_CPP_UNDEFINE "${HIP_CPP_UNDEFINE}\
elseif(${hip_config_item} STREQUAL "-D${macro_name}")
set(HIP_CPP_DEFINE "${HIP_CPP_DEFINE}#define ${macro_name} 1\n")
set(HIP_CPP_UNDEFINE
"${HIP_CPP_UNDEFINE}\
#ifdef ${macro_name}\n#undef ${macro_name}\n\
#else\n#error\n\
#endif\n")
endif()
endforeach()
endif()
endforeach()
endforeach()

message(STATUS "Using HIP compiler ${HIP_COMPILER}")

if(${HIP_COMPILER} STREQUAL "hcc")
set(MGE_ROCM_LIBS hip_hcc)
message(WARNING "hcc is not well supported, please modify link.txt to link with hipcc")
elseif (${HIP_COMPILER} STREQUAL "clang")
set(MGE_ROCM_LIBS amdhip64)
set(MGE_ROCM_LIBS hip_hcc)
message(
WARNING "hcc is not well supported, please modify link.txt to link with hipcc")
elseif(${HIP_COMPILER} STREQUAL "clang")
set(MGE_ROCM_LIBS amdhip64)
endif()

list(APPEND MGE_ROCM_LIBS amdocl64 MIOpen rocblas rocrand)
@@ -63,26 +72,28 @@ set(HIP_INCLUDE_DIR ${HIP_ROOT_DIR}/../include)
set(HIP_LIBRARY_DIR ${HIP_ROOT_DIR}/../lib)

function(find_rocm_library name dirname include library)
find_path(${name}_LIBRARY_DIR
NAMES ${library}
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}"
PATH_SUFFIXES lib lib/x86_64
DOC "Path to ${name} library directory")
find_path(
${name}_LIBRARY_DIR
NAMES ${library}
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}"
PATH_SUFFIXES lib lib/x86_64
DOC "Path to ${name} library directory")

if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$")
message(FATAL_ERROR "Can not find ${name} library")
endif()
if(${${name}_LIBRARY_DIR} MATCHES "NOTFOUND$")
message(FATAL_ERROR "Can not find ${name} library")
endif()

find_path(${name}_INCLUDE_DIR
NAMES ${include}
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}"
PATH_SUFFIXES include
DOC "Path to ${name} include directory")
find_path(
${name}_INCLUDE_DIR
NAMES ${include}
HINTS "${${name}_ROOT_DIR}" "${HIP_ROOT_DIR}/../${dirname}"
PATH_SUFFIXES include
DOC "Path to ${name} include directory")

if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$")
message(FATAL_ERROR "Can not find ${name} include")
endif()
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}")
if(${name}_INCLUDE_DIR MATCHES "NOTFOUND$")
message(FATAL_ERROR "Can not find ${name} include")
endif()
message(DEBUG "Found lib ${${name}_LIBRARY_DIR}, include ${${name}_INCLUDE_DIR}")
endfunction()

find_rocm_library(MIOPEN miopen miopen libMIOpen.so)


+ 153
- 130
cmake/tensorrt.cmake View File

@@ -1,166 +1,189 @@
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "")
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR})
if("${TRT_ROOT_DIR}" STREQUAL "" AND NOT "$ENV{TRT_ROOT_DIR}" STREQUAL "")
set(TRT_ROOT_DIR $ENV{TRT_ROOT_DIR})
endif()

if(MGE_CUDA_USE_STATIC)
find_library(TRT_LIBRARY
NAMES libnvinfer_static.a nvinfer.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library." )
find_library(TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT plugin library." )
find_library(
TRT_LIBRARY
NAMES libnvinfer_static.a nvinfer.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library.")
find_library(
TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin_static.a nvinfer_plugin.lib
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT plugin library.")
else()
find_library(TRT_LIBRARY
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library." )
find_library(TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT plugin library." )
find_library(
TRT_LIBRARY
NAMES libnvinfer.so libnvinfer.dylib nvinfer.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library.")
find_library(
TRT_PLUGIN_LIBRARY
NAMES libnvinfer_plugin.so libnvinfer_plugin.dylib nvinfer_plugin.dll
PATHS ${ALTER_LD_LIBRARY_PATHS} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${ALTER_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT plugin library.")
endif()

if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env")
message(
FATAL_ERROR
"Can not find TensorRT Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif()
if(TRT_PLUGIN_LIBRARY STREQUAL "TRT_PLUGIN_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env")
message(
FATAL_ERROR
"Can not find TensorRT Plugin Library, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif()

get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH)
find_path(TRT_INCLUDE_DIR
NAMES NvInfer.h
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root}
PATH_SUFFIXES include
DOC "Path to TRT include directory." )
find_path(TRT_PLUGIN_INCLUDE_DIR
NAMES NvInferPlugin.h
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root}
PATH_SUFFIXES include
DOC "Path to TRT plugin include directory." )
find_path(
TRT_INCLUDE_DIR
NAMES NvInfer.h
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root}
PATH_SUFFIXES include
DOC "Path to TRT include directory.")
find_path(
TRT_PLUGIN_INCLUDE_DIR
NAMES NvInferPlugin.h
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root}
PATH_SUFFIXES include
DOC "Path to TRT plugin include directory.")

if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env")
message(
FATAL_ERROR
"Can not find TensorRT INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif()
if(TRT_PLUGIN_INCLUDE_DIR STREQUAL "TRT_PLUGIN_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env")
message(
FATAL_ERROR
"Can not find TensorRT Plugin INCLUDE, please refer to scripts/cmake-build/BUILD_README.md to init TRT env"
)
endif()

file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")

if (TensorRT_MAJOR STREQUAL "")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
if(TensorRT_MAJOR STREQUAL "")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR
REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR
REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH
REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
endif()

string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1"
TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1"
TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1"
TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
set(TRT_VERSION_STRING
"${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")

if(MGE_CUDA_USE_STATIC)
add_library(libnvinfer STATIC IMPORTED)
add_library(libnvinfer_plugin STATIC IMPORTED)
add_library(libnvinfer STATIC IMPORTED)
add_library(libnvinfer_plugin STATIC IMPORTED)
else()
add_library(libnvinfer SHARED IMPORTED)
add_library(libnvinfer_plugin SHARED IMPORTED)
add_library(libnvinfer SHARED IMPORTED)
add_library(libnvinfer_plugin SHARED IMPORTED)
endif()

set_target_properties(libnvinfer PROPERTIES
IMPORTED_LOCATION ${TRT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${TRT_INCLUDE_DIR}
)
set_target_properties(libnvinfer_plugin PROPERTIES
IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR}
)
set_target_properties(
libnvinfer PROPERTIES IMPORTED_LOCATION ${TRT_LIBRARY} INTERFACE_INCLUDE_DIRECTORIES
${TRT_INCLUDE_DIR})
set_target_properties(
libnvinfer_plugin PROPERTIES IMPORTED_LOCATION ${TRT_PLUGIN_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${TRT_PLUGIN_INCLUDE_DIR})

message(STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})")
message(
STATUS "Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})")

if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
if(MGE_CUDA_USE_STATIC)
find_library(LIBMYELIN_COMPILER
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib
PATHS ${__found_trt_root}/lib
)
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library")
else()
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}")
endif()
add_library(libmyelin_compiler STATIC IMPORTED)
set_target_properties(libmyelin_compiler PROPERTIES
IMPORTED_LOCATION ${LIBMYELIN_COMPILER}
)
if(MGE_CUDA_USE_STATIC)
find_library(
LIBMYELIN_COMPILER
NAMES libmyelin_compiler_static.a myelin_compiler_static.lib
PATHS ${__found_trt_root}/lib)
if(LIBMYELIN_COMPILER STREQUAL "LIBMYELIN_COMPILER-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_COMPILER Library")
else()
message(STATUS "Found TensorRT myelin_compiler: ${LIBMYELIN_COMPILER}")
endif()
add_library(libmyelin_compiler STATIC IMPORTED)
set_target_properties(libmyelin_compiler PROPERTIES IMPORTED_LOCATION
${LIBMYELIN_COMPILER})

find_library(LIBMYELIN_EXECUTOR
NAMES libmyelin_executor_static.a myelin_executor_static.lib
PATHS ${__found_trt_root}/lib
)
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library")
else()
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}")
endif()
add_library(libmyelin_executor STATIC IMPORTED)
set_target_properties(libmyelin_executor PROPERTIES
IMPORTED_LOCATION ${LIBMYELIN_EXECUTOR}
)
find_library(
LIBMYELIN_EXECUTOR
NAMES libmyelin_executor_static.a myelin_executor_static.lib
PATHS ${__found_trt_root}/lib)
if(LIBMYELIN_EXECUTOR STREQUAL "LIBMYELIN_EXECUTOR-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_EXECUTOR Library")
else()
message(STATUS "Found TensorRT libmyelin_executor: ${LIBMYELIN_EXECUTOR}")
endif()
add_library(libmyelin_executor STATIC IMPORTED)
set_target_properties(libmyelin_executor PROPERTIES IMPORTED_LOCATION
${LIBMYELIN_EXECUTOR})

find_library(LIBMYELIN_PATTERN_RUNTIME
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib
PATHS ${__found_trt_root}/lib
)
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library")
else()
message(STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}")
endif()
add_library(libmyelin_pattern_runtime STATIC IMPORTED)
set_target_properties(libmyelin_pattern_runtime PROPERTIES
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME}
)
find_library(
LIBMYELIN_PATTERN_RUNTIME
NAMES libmyelin_pattern_runtime_static.a myelin_pattern_runtime_static.lib
PATHS ${__found_trt_root}/lib)
if(LIBMYELIN_PATTERN_RUNTIME STREQUAL "LIBMYELIN_PATTERN_RUNTIME-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_RUNTIME Library")
else()
message(
STATUS "Found TensorRT libmyelin_pattern_runtime: ${LIBMYELIN_PATTERN_RUNTIME}")
endif()
add_library(libmyelin_pattern_runtime STATIC IMPORTED)
set_target_properties(libmyelin_pattern_runtime
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_RUNTIME})

find_library(LIBMYELIN_PATTERN_LIBRARY
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib
PATHS ${__found_trt_root}/lib
)
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library")
else()
message(STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}")
endif()
add_library(libmyelin_pattern_library STATIC IMPORTED)
set_target_properties(libmyelin_pattern_library PROPERTIES
IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY}
)
find_library(
LIBMYELIN_PATTERN_LIBRARY
NAMES libmyelin_pattern_library_static.a myelin_pattern_library_static.lib
PATHS ${__found_trt_root}/lib)
if(LIBMYELIN_PATTERN_LIBRARY STREQUAL "LIBMYELIN_PATTERN_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_PATTERN_LIBRARY Library")
else()
find_library(LIBMYELIN_SHARED
NAMES libmyelin.so myelin.dll
PATHS ${__found_trt_root}/lib
)
message(
STATUS "Found TensorRT libmyelin_pattern_library: ${LIBMYELIN_PATTERN_LIBRARY}")
endif()
add_library(libmyelin_pattern_library STATIC IMPORTED)
set_target_properties(libmyelin_pattern_library
PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_PATTERN_LIBRARY})
else()
find_library(
LIBMYELIN_SHARED
NAMES libmyelin.so myelin.dll
PATHS ${__found_trt_root}/lib)

if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library")
else()
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}")
endif()
add_library(libmyelin SHARED IMPORTED)
set_target_properties(libmyelin PROPERTIES
IMPORTED_LOCATION ${LIBMYELIN_SHARED}
)
if(LIBMYELIN_SHARED STREQUAL "LIBMYELIN_SHARED-NOTFOUND")
message(FATAL_ERROR "Can not find LIBMYELIN_SHARED Library")
else()
message(STATUS "Found TensorRT libmyelin_shared: ${LIBMYELIN_SHARED}")
endif()
add_library(libmyelin SHARED IMPORTED)
set_target_properties(libmyelin PROPERTIES IMPORTED_LOCATION ${LIBMYELIN_SHARED})
endif()
endif()

+ 19
- 13
cmake/zmq.cmake View File

@@ -1,17 +1,26 @@
include(ExternalProject)
include(GNUInstallDirs)

set(ZMQ_DIR ${PROJECT_SOURCE_DIR}/third_party/libzmq CACHE STRING "ZMQ directory")
set(ZMQ_DIR
${PROJECT_SOURCE_DIR}/third_party/libzmq
CACHE STRING "ZMQ directory")
set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq)
set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a)

ExternalProject_add(
zmq
SOURCE_DIR ${ZMQ_DIR}
PREFIX ${ZMQ_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} -DWITH_PERF_TOOL=OFF -DZMQ_BUILD_TESTS=OFF -DENABLE_CPACK=OFF -DENABLE_CURVE=OFF
BUILD_BYPRODUCTS ${ZMQ_LIB}
)
ExternalProject_Add(
zmq
SOURCE_DIR ${ZMQ_DIR}
PREFIX ${ZMQ_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR}
-DWITH_PERF_TOOL=OFF
-DZMQ_BUILD_TESTS=OFF
-DENABLE_CPACK=OFF
-DENABLE_CURVE=OFF
BUILD_BYPRODUCTS ${ZMQ_LIB})

set(ZMQ_INC ${ZMQ_BUILD_DIR}/include)
include_directories(${ZMQ_INC})
@@ -19,8 +28,5 @@ file(MAKE_DIRECTORY ${ZMQ_INC})

add_library(libzmq STATIC IMPORTED GLOBAL)
add_dependencies(libzmq zmq)
set_target_properties(
libzmq PROPERTIES
IMPORTED_LOCATION ${ZMQ_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC}
)
set_target_properties(libzmq PROPERTIES IMPORTED_LOCATION ${ZMQ_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC})

+ 32
- 37
dnn/CMakeLists.txt View File

@@ -4,66 +4,61 @@ set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py)
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/)
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn)
add_custom_command(
OUTPUT
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS}
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS}
tmp_unuse.log --write-cppjson ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM
)

list(APPEND OPR_PARAM_DEFS_OUTS
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT}
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
COMMAND
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT}
${OPR_PARAM_DEFS_SRCS} tmp_unuse.log --write-cppjson
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
)
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM)

list(APPEND OPR_PARAM_DEFS_OUTS ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h)
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR})

set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common)
add_custom_command(
OUTPUT
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT}
--enumv ${OPR_PARAM_DEFS_SRCS}
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM
)
OUTPUT ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
COMMAND
${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} --enumv
${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM)

list(APPEND OPR_PARAM_DEFS_OUTS
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
)
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh)
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR})

install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} FILES_MATCHING PATTERN "*.h")
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING
PATTERN "*.h")

add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS})
add_library(opr_param_defs INTERFACE)
target_include_directories(opr_param_defs
INTERFACE
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
foreach (INCPATH IN LISTS OPR_PARAM_DEFS_INC)
target_include_directories(opr_param_defs
INTERFACE $<BUILD_INTERFACE:${INCPATH}>
)
INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
foreach(INCPATH IN LISTS OPR_PARAM_DEFS_INC)
target_include_directories(opr_param_defs INTERFACE $<BUILD_INTERFACE:${INCPATH}>)
endforeach()

add_dependencies(opr_param_defs _opr_param_defs)
install(TARGETS opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})

if(MGE_WITH_CUDA)
add_library(cutlass INTERFACE)
target_include_directories(cutlass
INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>)
add_library(cutlass INTERFACE)
target_include_directories(
cutlass
INTERFACE $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/cutlass/include>)
endif()

if(MGE_WITH_TEST)
add_subdirectory(test)
add_subdirectory(test)
endif()

add_subdirectory(src)


+ 4
- 2
dnn/atlas-stub/CMakeLists.txt View File

@@ -1,6 +1,8 @@
add_library(atlas-stub STATIC src/libatlas-wrap.cpp)
target_include_directories(atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(
atlas-stub PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
install(TARGETS atlas-stub EXPORT ${MGE_EXPORT_TARGETS})

add_library(acl-cblas STATIC src/libacl_cblas-wrap.cpp)
target_include_directories(acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(
acl-cblas PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)

+ 12
- 11
dnn/cuda-stub/CMakeLists.txt View File

@@ -1,26 +1,27 @@
file (GLOB_RECURSE CUDA_STUB src/libcuda.cpp)
file (GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp)
file(GLOB_RECURSE CUDA_STUB src/libcuda.cpp)
file(GLOB_RECURSE NVRTC_STUB src/libnvrtc.cpp)

if(MGE_WITH_CUDA_STUB)
list(APPEND STUB_SRC ${CUDA_STUB})
list(APPEND STUB_SRC ${CUDA_STUB})
endif()

if(MGE_WITH_NVRTC_STUB)
list(APPEND STUB_SRC ${NVRTC_STUB})
list(APPEND STUB_SRC ${NVRTC_STUB})
endif()

if(MSVC OR WIN32)
add_library (cuda-stub STATIC ${STUB_SRC})
add_library(cuda-stub STATIC ${STUB_SRC})
else()
add_library (cuda-stub SHARED ${STUB_SRC})
add_library(cuda-stub SHARED ${STUB_SRC})
endif()

set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda_stub)
target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL)
if (MSVC OR WIN32)
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined)
if(MSVC OR WIN32)
target_link_libraries(cuda-stub PRIVATE -Wl,--no-undefined)
else()
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined)
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined)
endif()
target_include_directories(cuda-stub PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>)
install (TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS})
target_include_directories(cuda-stub
PRIVATE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>)
install(TARGETS cuda-stub EXPORT ${MGE_EXPORT_TARGETS})

+ 6
- 0
dnn/include/megdnn/common.h View File

@@ -12,6 +12,7 @@
#pragma once

#include "megbrain_build_config.h"
#include "megdnn/oprs/base.h"

#if MGB_ENABLE_GETENV
#define MGB_GETENV ::std::getenv
@@ -36,6 +37,11 @@ bool has_available_algo(Opr* opr, Args&&... args) {
return !all_algos.empty();
}

template <class Opr, typename... Args>
bool has_no_naive_heuristic_algo(Opr* opr, Args&&... args) {
auto&& algo = opr->get_algorithm_info_heuristic(std::forward<Args>(args)...);
return !static_cast<bool>(algo.attribute & detail::Algorithm::Attribute::NAIVE);
}
} // namespace megdnn

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}

+ 113
- 0
dnn/include/megdnn/oprs/nn.h View File

@@ -1936,6 +1936,119 @@ protected:
const TensorLayout& grad_s, size_t workspace_in_bytes);
};

class LayerNormBase : public OperatorBase {
DEF_OPR_IMPL_CTOR(LayerNormBase, OperatorBase);
DEF_OPR_PARAM(LayerNorm);

protected:
void deduce_layout_fwd(
const TensorLayout& data, const TensorLayout& weight,
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean,
TensorLayout& rstd);
void check_layout_fwd(
const TensorLayout& data, const TensorLayout& weight,
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean,
const TensorLayout& rstd);
};

class LayerNormForward : public LayerNormBase {
DEF_OPR_IMPL(LayerNormForward, LayerNormBase, 3, 3);

public:
virtual void exec(
_megdnn_tensor_in data, _megdnn_tensor_in weight, _megdnn_tensor_in bias,
_megdnn_tensor_out dst, _megdnn_tensor_out mean, _megdnn_tensor_out rstd,
_megdnn_workspace workspace) = 0;
void deduce_layout(
const TensorLayout& data, const TensorLayout& weight,
const TensorLayout& bias, TensorLayout& dst, TensorLayout& mean,
TensorLayout& rstd);
virtual size_t get_workspace_in_bytes(
const TensorLayout& data, const TensorLayout& weight,
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean,
const TensorLayout& rstd) = 0;

protected:
void check_exec(
const TensorLayout& data, const TensorLayout& weight,
const TensorLayout& bias, const TensorLayout& dst, const TensorLayout& mean,
const TensorLayout& rstd, size_t workspace_in_bytes);
};
using LayerNorm = LayerNormForward;

class LayerNormBackward : public LayerNormBase {
DEF_OPR_IMPL(LayerNormBackward, LayerNormBase, 5, 3);

public:
virtual void exec(
_megdnn_tensor_in diff, _megdnn_tensor_in data, _megdnn_tensor_in weight,
_megdnn_tensor_in mean, _megdnn_tensor_in rstd, _megdnn_tensor_out ddata,
_megdnn_tensor_out dweight, _megdnn_tensor_out dbias,
_megdnn_workspace workspace) = 0;
void deduce_layout(
const TensorLayout& diff, const TensorLayout& data,
const TensorLayout& weight, const TensorLayout& mean,
const TensorLayout& rstd, TensorLayout& ddata, TensorLayout& dweight,
TensorLayout& dbias);
virtual size_t get_workspace_in_bytes(
const TensorLayout& diff, const TensorLayout& data,
const TensorLayout& weight, const TensorLayout& mean,
const TensorLayout& rstd, const TensorLayout& ddata,
const TensorLayout& dweight, const TensorLayout& dbias) = 0;

protected:
void check_exec(
const TensorLayout& diff, const TensorLayout& data,
const TensorLayout& weight, const TensorLayout& mean,
const TensorLayout& rstd, const TensorLayout& ddata,
const TensorLayout& dweight, const TensorLayout& dbias,
size_t workspace_in_bytes);
};

class DropoutBase : public OperatorBase {
DEF_OPR_IMPL_CTOR(DropoutBase, OperatorBase);
DEF_OPR_PARAM(Dropout);
};

class DropoutForward : public DropoutBase {
DEF_OPR_IMPL(DropoutForward, DropoutBase, 1, 2);

public:
void deduce_layout(const TensorLayout& inp, TensorLayout& oup, TensorLayout& mask);
virtual void exec(
_megdnn_tensor_in inp, _megdnn_tensor_out oup, _megdnn_tensor_out mask,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(
const TensorLayout& inp, const TensorLayout& oup,
const TensorLayout& mask) = 0;
virtual size_t get_mask_size_in_bytes(const TensorLayout& inp) = 0;

protected:
void check_exec(
const TensorLayout& inp, const TensorLayout& oup, const TensorLayout& mask,
size_t workspace_in_bytes);
};
using Dropout = DropoutForward;

class DropoutBackward : public DropoutBase {
DEF_OPR_IMPL(DropoutBackward, DropoutBase, 2, 1);

public:
void deduce_layout(
const TensorLayout& doup, const TensorLayout& mask, TensorLayout& dinp);
virtual void exec(
_megdnn_tensor_in doup, _megdnn_tensor_in mask, _megdnn_tensor_out dinp,
_megdnn_workspace workspace) = 0;
virtual size_t get_workspace_in_bytes(
const TensorLayout& doup, const TensorLayout& mask,
const TensorLayout& dinp) = 0;

protected:
void check_exec(
const TensorLayout& doup, const TensorLayout& mask,
const TensorLayout& dinp, size_t workspace_in_bytes);
};

} // namespace megdnn
#include "megdnn/internal/opr_header_epilogue.h"



+ 12
- 0
dnn/scripts/opr_param_defs.py View File

@@ -1212,3 +1212,15 @@ PADDING_MODES = [Doc('REPLICATE = 0', 'aaaaaa|abcdefgh|hhhhhhh'),
member_alias=[(i, 'PADDING_{}'.format(i)) for i in PADDING_MODES]
)
)

(pdef('LayerNorm')
.add_fields('bool', 'affine', 'true')
.add_fields('float32', 'eps', '1e-5f')
.add_fields('uint64', 'normalized_dim', '1')
.add_fields('uint64', 'normalized_size', '1')
)

(pdef('Dropout')
.add_fields('float32', 'drop_prob', '0')
.add_fields('uint64', 'seed', '0')
)

+ 221
- 195
dnn/src/CMakeLists.txt View File

@@ -5,168 +5,190 @@ file(GLOB_RECURSE SOURCES common/*.cpp naive/*.cpp)
list(APPEND SOURCES ${PROJECT_BINARY_DIR}/genfiles/megbrain_build_config.h)

if(NOT ${MGE_ARCH} STREQUAL "naive")
file(GLOB_RECURSE SOURCES_ fallback/*.cpp)
file(GLOB_RECURSE SOURCES_ fallback/*.cpp)
list(APPEND SOURCES ${SOURCES_})
if(${MGE_ARCH} STREQUAL "fallback")
message(WARNING "build only with fallback")
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
file(GLOB_RECURSE SOURCES_ x86/*.cpp)
list(APPEND SOURCES ${SOURCES_})
if(${MGE_ARCH} STREQUAL "fallback")
message(WARNING "build only with fallback")
elseif(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
file(GLOB_RECURSE SOURCES_ x86/*.cpp)
list(APPEND SOURCES ${SOURCES_})
if(NOT MSVC)
file(GLOB_RECURSE SOURCES_ x86/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
endif()
elseif(${MGE_ARCH} STREQUAL "armv7")
file(GLOB_RECURSE SOURCES_ armv7/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ armv7/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
elseif(${MGE_ARCH} STREQUAL "aarch64")
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ aarch64/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
if(NOT MSVC)
file(GLOB_RECURSE SOURCES_ x86/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
endif()
elseif(${MGE_ARCH} STREQUAL "armv7")
file(GLOB_RECURSE SOURCES_ armv7/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ armv7/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
elseif(${MGE_ARCH} STREQUAL "aarch64")
file(GLOB_RECURSE SOURCES_ aarch64/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ arm_common/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ aarch64/*.S)
set_source_files_properties(${SOURCES_} PROPERTIES LANGUAGE C)
list(APPEND SOURCES ${SOURCES_})
endif()
endif()

if(MGE_WITH_MIDOUT_PROFILE)
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp)
list(APPEND SOURCES ${PROJECT_SOURCE_DIR}/third_party/midout/src/midout.cpp)
endif()

###############################################################################
# ######################################################################################
# HIP_COMPILE
###############################################################################
macro (HIP_COMPILE _hip_target _hip_objs)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources
_cmake_options
_hipcc_options
_hcc_options
_nvcc_options
${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${_hip_target}
OBJ _generated_files _source_files ${_sources} ${_cmake_options}
HIPCC_OPTIONS ${_hipcc_options}
HCC_OPTIONS ${_hcc_options}
NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
# ######################################################################################
macro(HIP_COMPILE _hip_target _hip_objs)
# Separate the sources from the options
hip_get_sources_and_options(_sources _cmake_options _hipcc_options _hcc_options
_nvcc_options ${ARGN})
hip_prepare_target_commands(
${_hip_target}
OBJ
_generated_files
_source_files
${_sources}
${_cmake_options}
HIPCC_OPTIONS
${_hipcc_options}
HCC_OPTIONS
${_hcc_options}
NVCC_OPTIONS
${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()

add_custom_target(${_hip_target})
add_custom_target(${_hip_target})

# set return value
set(${_hip_objs} ${_generated_files})
# set return value
set(${_hip_objs} ${_generated_files})
endmacro()

if (MGE_WITH_ROCM)
file (GLOB_RECURSE SOURCES_ rocm/*.cpp)
list (APPEND SOURCES ${SOURCES_})

# FIXME rocm may lost the first hip file, so currently we just create an
# empty file to bypass this error.
file(GLOB start.cpp.hip "" )
list(APPEND HIP_SOURCES start.cpp.hip)
configure_file(
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h)

configure_file(
${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h)

file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip)
set(HIP_TARGET_NAME megdnn_hip_kernel)
set(_HIPCC_OPTIONS "-fPIC")
set(_HCC_OPTIONS "-fPIC")
set(_NVCC_OPTIONS "-fPIC")

list(APPEND HIP_SOURCES ${HIP_SOURCES_})
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
HIP_INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/dnn
${PROJECT_SOURCE_DIR}/dnn/include
${PROJECT_BINARY_DIR}/dnn
${PROJECT_BINARY_DIR}/genfiles
${PROJECT_BINARY_DIR}/dnn/include
${HIP_INCLUDE_DIR}
${MIOPEN_INCLUDE_DIR}
${ROCBLAS_INCLUDE_DIR}
${ROCRAND_INCLUDE_DIR}
${AMDOCL_INCLUDE_DIR})
hip_compile(
${HIP_TARGET_NAME} HIPOBJS ${HIP_SOURCES}
HIPCC_OPTIONS ${_HIPCC_OPTIONS}
HCC_OPTIONS ${_HCC_OPTIONS}
NVCC_OPTIONS ${_NVCC_OPTIONS})
list(APPEND SOURCES ${HIPOBJS})
endif ()
if(MGE_WITH_ROCM)
file(GLOB_RECURSE SOURCES_ rocm/*.cpp)
list(APPEND SOURCES ${SOURCES_})

# FIXME rocm may lost the first hip file, so currently we just create an empty file to
# bypass this error.
file(GLOB start.cpp.hip "")
list(APPEND HIP_SOURCES start.cpp.hip)
configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h.in
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_prologue.h)

configure_file(${PROJECT_SOURCE_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h.in
${PROJECT_BINARY_DIR}/dnn/include/hcc_detail/hcc_defs_epilogue.h)

file(GLOB_RECURSE HIP_SOURCES_ rocm/*.cpp.hip)
set(HIP_TARGET_NAME megdnn_hip_kernel)
set(_HIPCC_OPTIONS "-fPIC")
set(_HCC_OPTIONS "-fPIC")
set(_NVCC_OPTIONS "-fPIC")

list(APPEND HIP_SOURCES ${HIP_SOURCES_})
set_source_files_properties(${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_include_directories(
${PROJECT_SOURCE_DIR}/dnn
${PROJECT_SOURCE_DIR}/dnn/include
${PROJECT_BINARY_DIR}/dnn
${PROJECT_BINARY_DIR}/genfiles
${PROJECT_BINARY_DIR}/dnn/include
${HIP_INCLUDE_DIR}
${MIOPEN_INCLUDE_DIR}
${ROCBLAS_INCLUDE_DIR}
${ROCRAND_INCLUDE_DIR}
${AMDOCL_INCLUDE_DIR})
hip_compile(
${HIP_TARGET_NAME}
HIPOBJS
${HIP_SOURCES}
HIPCC_OPTIONS
${_HIPCC_OPTIONS}
HCC_OPTIONS
${_HCC_OPTIONS}
NVCC_OPTIONS
${_NVCC_OPTIONS})
list(APPEND SOURCES ${HIPOBJS})
endif()

if(MGE_WITH_CUDA)
file(GLOB_RECURSE SOURCES_ cuda/*.cpp)
list(APPEND SOURCES ${SOURCES_})
file(GLOB_RECURSE SOURCES_ cuda/*.cpp)
list(APPEND SOURCES ${SOURCES_})

file(GLOB_RECURSE CUSOURCES cuda/*.cu)

set(CUTLASS_GEN_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py)
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated)
set(CUTLASS_SOURCES "")
function(gen_cutlass_kimpl op type gen_files)
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage)
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type})
set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT})
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR})
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR})
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR})
execute_process(
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations ${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR}
RESULT_VARIABLE gen_cutlass_result
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log
)
if (NOT gen_cutlass_result EQUAL 0)
message(FATAL_ERROR "Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log")
endif()
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/" "${CURRENT_CUTLASS_GEN_DIR}/*.cu")
foreach(FILE ${CUTLASS_GEN_FILES})
if (NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}")
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}")
endif()
endforeach()
file(GLOB CUTLASS_GEN_FILES RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}" "${CURRENT_CUTLASS_STAGE_DIR}/*.cu")
foreach(FILE ${CUTLASS_GEN_FILES})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}")
endforeach()
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR})
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu")
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES})
set(${gen_files} "${${gen_files}}" PARENT_SCOPE)
endfunction()
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES)
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES)
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES)
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES)
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES)
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES)
list(APPEND SOURCES ${CUTLASS_SOURCES})
list(APPEND SOURCES ${CUSOURCES})
file(GLOB_RECURSE CUSOURCES cuda/*.cu)

set(CUTLASS_GEN_SCRIPT
${CMAKE_CURRENT_SOURCE_DIR}/../scripts/cutlass_generator/generator.py)
set(CUTLASS_GEN_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuda/cutlass/generated)
set(CUTLASS_SOURCES "")
function(gen_cutlass_kimpl op type gen_files)
set(CURRENT_CUTLASS_STAGE_DIR ${CUTLASS_GEN_DIR}/${op}_${type}.stage)
set(CURRENT_CUTLASS_GEN_DIR ${CUTLASS_GEN_DIR}/${op}_${type})

set_directory_properties(PROPERTIES CMAKE_CONFIGURE_DEPENDS ${CUTLASS_GEN_SCRIPT})

file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR})
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_STAGE_DIR})
file(MAKE_DIRECTORY ${CURRENT_CUTLASS_GEN_DIR})
execute_process(
COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${CUTLASS_GEN_SCRIPT} --operations
${op} --type ${type} ${CURRENT_CUTLASS_STAGE_DIR}
RESULT_VARIABLE gen_cutlass_result
OUTPUT_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log
ERROR_FILE ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log)
if(NOT gen_cutlass_result EQUAL 0)
message(
FATAL_ERROR
"Error generating library instances. See ${CURRENT_CUTLASS_GEN_DIR}/gen_cutlass.log"
)
endif()
file(
GLOB CUTLASS_GEN_FILES
RELATIVE "${CURRENT_CUTLASS_GEN_DIR}/"
"${CURRENT_CUTLASS_GEN_DIR}/*.cu")
foreach(FILE ${CUTLASS_GEN_FILES})
if(NOT EXISTS "${CURRENT_CUTLASS_STAGE_DIR}/${FILE}")
file(REMOVE "${CURRENT_CUTLASS_GEN_DIR}/${FILE}")
endif()
endforeach()
file(
GLOB CUTLASS_GEN_FILES
RELATIVE "${CURRENT_CUTLASS_STAGE_DIR}"
"${CURRENT_CUTLASS_STAGE_DIR}/*.cu")
foreach(FILE ${CUTLASS_GEN_FILES})
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${CURRENT_CUTLASS_STAGE_DIR}/${FILE}" "${CURRENT_CUTLASS_GEN_DIR}")
endforeach()
file(REMOVE_RECURSE ${CURRENT_CUTLASS_STAGE_DIR})
file(GLOB_RECURSE CUTLASS_GEN_FILES "${CURRENT_CUTLASS_GEN_DIR}/*.cu")
list(APPEND ${gen_files} ${CUTLASS_GEN_FILES})
set(${gen_files}
"${${gen_files}}"
PARENT_SCOPE)
endfunction()
gen_cutlass_kimpl(gemm simt CUTLASS_SOURCES)
gen_cutlass_kimpl(gemm tensorop884 CUTLASS_SOURCES)
gen_cutlass_kimpl(gemm tensorop1688 CUTLASS_SOURCES)
gen_cutlass_kimpl(gemv simt CUTLASS_SOURCES)
gen_cutlass_kimpl(deconv simt CUTLASS_SOURCES)
gen_cutlass_kimpl(deconv tensorop8816 CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d simt CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d tensorop8816 CUTLASS_SOURCES)
gen_cutlass_kimpl(conv2d tensorop8832 CUTLASS_SOURCES)
list(APPEND SOURCES ${CUTLASS_SOURCES})
list(APPEND SOURCES ${CUSOURCES})
endif()

if(MGE_WITH_ATLAS)
file(GLOB_RECURSE SOURCES_ atlas/*.cpp)
list(APPEND SOURCES ${SOURCES_})
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1)
file(GLOB_RECURSE SOURCES_ atlas/*.cpp)
list(APPEND SOURCES ${SOURCES_})
list(APPEND LIBMEGDNN_DEF -DMEGDNN_WITH_ATLAS=1)
endif()

add_definitions(${LIBMEGDNN_DEF})
@@ -174,81 +196,85 @@ add_library(megdnn EXCLUDE_FROM_ALL OBJECT ${SOURCES})

target_link_libraries(megdnn PUBLIC opr_param_defs)
if(MGE_WITH_CUDA)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>)
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR})
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cutlass>)
target_include_directories(megdnn PRIVATE ${CUDNN_INCLUDE_DIR})
endif()

if(MGE_WITH_ROCM)
target_include_directories(megdnn PUBLIC
${HIP_INCLUDE_DIR}
${MIOPEN_INCLUDE_DIR}
${ROCBLAS_INCLUDE_DIR}
${ROCRAND_INCLUDE_DIR}
${AMDOCL_INCLUDE_DIR})
target_link_directories(megdnn PUBLIC
${HIP_LIBRARY_DIR}
${MIOPEN_LIBRARY_DIR}
${ROCBLAS_LIBRARY_DIR}
${ROCRAND_LIBRARY_DIR}
${AMDOCL_LIBRARY_DIR})
target_include_directories(
megdnn PUBLIC ${HIP_INCLUDE_DIR} ${MIOPEN_INCLUDE_DIR} ${ROCBLAS_INCLUDE_DIR}
${ROCRAND_INCLUDE_DIR} ${AMDOCL_INCLUDE_DIR})
target_link_directories(
megdnn
PUBLIC
${HIP_LIBRARY_DIR}
${MIOPEN_LIBRARY_DIR}
${ROCBLAS_LIBRARY_DIR}
${ROCRAND_LIBRARY_DIR}
${AMDOCL_LIBRARY_DIR})
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64")
if(MGE_ENABLE_CPUINFO)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>)
endif()
if(${MGE_ARCH} STREQUAL "x86_64"
OR ${MGE_ARCH} STREQUAL "i386"
OR ${MGE_ARCH} STREQUAL "armv7"
OR ${MGE_ARCH} STREQUAL "aarch64")
if(MGE_ENABLE_CPUINFO)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:cpuinfo>)
endif()
endif()

target_include_directories(megdnn
PUBLIC
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE
${PROJECT_SOURCE_DIR}/dnn
${PROJECT_SOURCE_DIR}/third_party/midout/src
)
target_include_directories(
megdnn
PUBLIC $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/genfiles>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/dnn/include>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${PROJECT_SOURCE_DIR}/dnn ${PROJECT_SOURCE_DIR}/third_party/midout/src)

install(DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include DESTINATION . FILES_MATCHING PATTERN "*.h*")
install(
DIRECTORY ${PROJECT_SOURCE_DIR}/dnn/include
DESTINATION .
FILES_MATCHING
PATTERN "*.h*")

if(CXX_SUPPORT_WCLASS_MEMACCESS)
if(MGE_WITH_CUDA)
target_compile_options(megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>")
else()
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess")
endif()
if(MGE_WITH_CUDA)
target_compile_options(
megdnn PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-Wno-class-memaccess>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-class-memaccess>")
else()
target_compile_options(megdnn PRIVATE "-Wno-class-memaccess")
endif()
endif()
target_compile_definitions(megdnn INTERFACE ${LIBMEGDNN_DEF})

if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
if (BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>)
else()
target_link_libraries(megdnn PRIVATE dnnl)
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:dnnl>)
else()
target_link_libraries(megdnn PRIVATE dnnl)
endif()
endif()
if (BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>)
if(BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_BLAS_LIBS}>)
else()
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS})
target_link_libraries(megdnn PRIVATE ${MGE_BLAS_LIBS})
endif()

if (MGE_WITH_ROCM)
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS})
endif ()
if(MGE_WITH_ROCM)
target_link_libraries(megdnn PRIVATE ${HIPOBJS} ${MGE_ROCM_LIBS})
endif()

if(MGE_WITH_ATLAS)
if (BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>)
else()
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS})
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(megdnn PRIVATE $<BUILD_INTERFACE:${MGE_ATLAS_LIBS}>)
else()
target_link_libraries(megdnn PRIVATE ${MGE_ATLAS_LIBS})
endif()

endif()

if(CMAKE_THREAD_LIBS_INIT)
target_link_libraries(megdnn PRIVATE Threads::Threads)
target_link_libraries(megdnn PRIVATE Threads::Threads)
endif()

install(TARGETS megdnn EXPORT ${MGE_EXPORT_TARGETS})

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1(2);
INSTANTIATION_CONV_S1_BIAS(2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_NO_BIAS(2);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2(5);
INSTANTIATION_CONV_S2_BIAS(2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_NO_BIAS(2);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1(5);
INSTANTIATION_CONV_S1_BIAS(3);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(3);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_NO_BIAS(3);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_2x2s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2(2);
INSTANTIATION_CONV_S2_BIAS(3);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(3);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_NO_BIAS(3);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1(3);
INSTANTIATION_CONV_S1_BIAS(5);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(5);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_NO_BIAS(5);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2(7);
INSTANTIATION_CONV_S2_BIAS(5);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(5);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_5x5s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_NO_BIAS(5);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1(7);
INSTANTIATION_CONV_S1_BIAS(7);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(7);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h"
INSTANTIATION_CONV_S1_NO_BIAS(7);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_3x3s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2(3);
INSTANTIATION_CONV_S2_BIAS(7);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(7);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_7x7s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h"
INSTANTIATION_CONV_S2_NO_BIAS(7);
// vim: syntax=cpp.doxygen

+ 8
- 5
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s1.h View File

@@ -469,9 +469,12 @@ void conv_bias::conv_direct_fp32_nchw44(
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>)

#define INSTANTIATION_CONV_S1(filter_size) \
FOR_OP(filter_size, BiasMode::NO_BIAS) \
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \
FOR_OP(filter_size, BiasMode::BIAS)
#define INSTANTIATION_CONV_S1_NO_BIAS(filter_size) \
FOR_OP(filter_size, BiasMode::NO_BIAS)

// vim: syntax=cpp.doxygen
#define INSTANTIATION_CONV_S1_BROADCAST_CHANNEL_BIAS(filter_size) \
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS)

#define INSTANTIATION_CONV_S1_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS)

// vim: syntax=cpp.doxygen

+ 8
- 5
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw44_kern_common_s2.h View File

@@ -550,9 +550,12 @@ void conv_bias::conv_direct_fp32_nchw44(
INSTANTIATION(filter_size, bias, HSwishOp<dt_float32>) \
INSTANTIATION(filter_size, bias, SigmoidOp<dt_float32>)

#define INSTANTIATION_CONV_S2(filter_size) \
FOR_OP(filter_size, BiasMode::NO_BIAS) \
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS) \
FOR_OP(filter_size, BiasMode::BIAS)
#define INSTANTIATION_CONV_S2_NO_BIAS(filter_size) \
FOR_OP(filter_size, BiasMode::NO_BIAS)

// vim: syntax=cpp.doxygen
#define INSTANTIATION_CONV_S2_BROADCAST_CHANNEL_BIAS(filter_size) \
FOR_OP(filter_size, BiasMode::BROADCAST_CHANNEL_BIAS)

#define INSTANTIATION_CONV_S2_BIAS(filter_size) FOR_OP(filter_size, BiasMode::BIAS)

// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV(2, 1);
INSTANCE_CONV_BIAS(2, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(2, 1);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV(2, 2);
INSTANCE_CONV_BIAS(2, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(2, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_2x2s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(2, 2);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV(3, 1);
INSTANCE_CONV_BIAS(3, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s1_no_bias
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(3, 1);
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2.cpp → dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2.cpp
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -11,4 +11,5 @@
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV(3, 2);
INSTANCE_CONV_BIAS(3, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(3, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_3x3s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(3, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BIAS(5, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(5, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BIAS(5, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(5, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_5x5s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(5, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BIAS(7, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s1_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(7, 1);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BIAS(7, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_broadcast_channel_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(7, 2);
// vim: syntax=cpp.doxygen

+ 15
- 0
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp View File

@@ -0,0 +1,15 @@
/**
* \file
* dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_7x7s2_no_bias.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h"
INSTANCE_CONV_NO_BIAS(7, 2);
// vim: syntax=cpp.doxygen

+ 6
- 4
dnn/src/arm_common/conv_bias/fp32/direct_kernels/f32_direct_nchw_nchw44_kern_common.h View File

@@ -928,9 +928,11 @@ void fp32_direct_nchw_nchw44::conv_direct_fp32_nchw_nchw44(
INSTANTIATION(stride, filter, bias, ReluOp<dt_float32>) \
INSTANTIATION(stride, filter, bias, HSwishOp<dt_float32>)

#define INSTANCE_CONV(filter, stride) \
FOR_OP(stride, filter, BiasMode::NO_BIAS) \
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS) \
FOR_OP(stride, filter, BiasMode::BIAS)
#define INSTANCE_CONV_NO_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::NO_BIAS)

#define INSTANCE_CONV_BROADCAST_CHANNEL_BIAS(filter, stride) \
FOR_OP(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS)

#define INSTANCE_CONV_BIAS(filter, stride) FOR_OP(stride, filter, BiasMode::BIAS)

// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.cpp → dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.cpp
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -265,7 +265,8 @@ void conv_direct_sdot_int8_nchw44(

#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \
template void \
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \
dst_type, stride, bias_mode, Op, filter_size>( \
dst_type * dst, const int oh, const int ow, const int8_t* src, \
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \
const int oh_size, const int oc, const int ic, const Op& op);
@@ -284,22 +285,6 @@ void conv_direct_sdot_int8_nchw44(
FOR_OP(stride, i, BiasMode::NO_BIAS) \
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS)

#define FOR_FILTER(stride) \
FOR_BIAS(stride, 2) \
FOR_BIAS(stride, 3) \
FOR_BIAS(stride, 5) \
FOR_BIAS(stride, 7)

FOR_FILTER(1)

#undef FOR_STRIDE
#undef FOR_FILTER
#undef FOR_IC
#undef FOR_BIAS
#undef FOR_NONLINEAR
#undef FOR_REMAIN
#undef INSTANTIATION

} // namespace direct_dotprod_nchw44
} // namespace arm_common
} // namespace megdnn

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_2x2.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(1, 2);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_3x3.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(1, 3);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_5x5.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(1, 5);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1_7x7.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s1.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(1, 7);

#endif
// vim: syntax=cpp.doxygen

dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.cpp → dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.cpp
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -266,7 +266,8 @@ void conv_direct_sdot_int8_nchw44(

#define INSTANTIATION(dst_type, stride, filter_size, bias_mode, Op) \
template void \
conv_direct_sdot_int8_nchw44<dst_type, stride, bias_mode, Op, filter_size>( \
megdnn::arm_common::direct_dotprod_nchw44::conv_direct_sdot_int8_nchw44< \
dst_type, stride, bias_mode, Op, filter_size>( \
dst_type * dst, const int oh, const int ow, const int8_t* src, \
const int ih, const int iw, const int8_t* weight, const int32_t* bias, \
const int oh_size, const int oc, const int ic, const Op& op);
@@ -285,22 +286,6 @@ void conv_direct_sdot_int8_nchw44(
FOR_OP(stride, i, BiasMode::NO_BIAS) \
FOR_OP(stride, i, BiasMode::BROADCAST_CHANNEL_BIAS)

#define FOR_FILTER(stride) \
FOR_BIAS(stride, 2) \
FOR_BIAS(stride, 3) \
FOR_BIAS(stride, 5) \
FOR_BIAS(stride, 7)

FOR_FILTER(2)

#undef FOR_STRIDE
#undef FOR_FILTER
#undef FOR_IC
#undef FOR_BIAS
#undef FOR_NONLINEAR
#undef FOR_REMAIN
#undef INSTANTIATION

} // namespace direct_dotprod_nchw44
} // namespace arm_common
} // namespace megdnn

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_2x2.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(2, 2);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_3x3.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(2, 3);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_5x5.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(2, 5);

#endif
// vim: syntax=cpp.doxygen

+ 21
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp View File

@@ -0,0 +1,21 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2_7x7.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/dot_direct_nchw44_s2.h"
#if MGB_ENABLE_DOT
using namespace megdnn;
using namespace arm_common;

FOR_BIAS(2, 7);

#endif
// vim: syntax=cpp.doxygen

+ 2
- 2
dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h View File

@@ -1,6 +1,6 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
@@ -45,4 +45,4 @@ public:
} // namespace arm_common
} // namespace megdnn

// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen

+ 5
- 445
dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.cpp View File

@@ -13,336 +13,9 @@

#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h"
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h"

namespace megdnn {
namespace arm_common {
namespace {
/**
* @brief core code for calculation patten
*
* @tparam src_idx is offset of src reg
* @tparam weight_idx is offset of weight reg
* @tparam c_dim is output channel
* @tparam Func mla operation funcion
* @tparam stride
* @tparam T outpur regs type
* @tparam T2 src regs type
* @tparam T3 weight regs type
* @tparam T4 temp regs type
*/

template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3, typename T4>
struct ShiftCalHelper {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp);
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight);
};
template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3, typename T4>
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) {
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl(
c, src, weight, temp);
}
template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3>
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) {
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl(
c, src, weight);
};
template <
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4>
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) {
c[0][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]);
c[1][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]);
c[0][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]);
c[1][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]);
c[0][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]);
c[1][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]);
c[0][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]);
c[1][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]);

c[0][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]);
c[1][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]);
c[0][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]);
c[1][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]);
c[0][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]);
c[1][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]);
c[0][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]);
c[1][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]);
}
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&);
};
template <
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4>
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) {
c[0][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]);
c[0][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]);
c[0][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]);
c[0][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]);
c[0][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]);
c[0][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]);
c[0][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]);
c[0][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]);
}
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&);
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 1;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}

store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 2;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}

store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 3;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);

load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc);

load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 5;
constexpr int filter_width = 8;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 2;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
#define cb(step) \
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c);
UNROLL_CALL_RAW(5, cb);
#undef cb
weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 7;
constexpr int filter_width = 8;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 2;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
#define cb(step) \
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c);

UNROLL_CALL_RAW(7, cb);
#undef cb
weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};
} // namespace

namespace int8_direct_nchw_nchw44 {
/**
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)}
@@ -444,115 +117,9 @@ void pack_nchw_src_for_nchw44_conv<1>(
}
}

template <BiasMode bias_mode, typename Op, size_t filter_size>
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> {
static void impl(
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp,
int8_t* dst, const size_t oc, const size_t ic, const size_t ih,
const size_t iw, const size_t oh, const size_t ow, const Op& op) {
MEGDNN_MARK_USED_VAR(temp);
constexpr int stride = 1;
constexpr size_t fh = filter_size;
constexpr size_t fw = (filter_size + 3) / 4 * 4;
constexpr size_t ic_step = 1;
constexpr size_t big_oc_step = 8;
constexpr size_t oc_step = 4;
constexpr size_t ih_step = 1;
constexpr size_t oh_step = 1;
constexpr size_t ow_step = 8;
constexpr size_t stride_h = stride;
constexpr size_t stride_w = stride;
constexpr int pack_iw_len = 16;

const size_t img_stride = oh * ow;
const size_t ow_end = ow / ow_step * ow_step;
const size_t ow_remain = ow - ow_end;
const size_t oc_end = oc / big_oc_step * big_oc_step;
const size_t oc_remain = oc - oc_end;
const int ld_dst_oc = oc_step * img_stride;

using remain_fun = std::function<void(
const int8_t* src_ptr, const int8_t* weight_ptr,
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw,
int ld_dst_oc, const Op& op)>;
remain_fun kern_big_oc_remain = nullptr;
remain_fun kern_small_oc_remain = nullptr;
switch (ow_remain) {
#define cb(step) \
case step: \
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \
break;

UNROLL_CALL_RAW(8, cb);
default:
megdnn_assert(0, "no remain %zu for kern", ow_remain);
}

for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) {
const size_t weight_offset = oc_idx * ic * fh * fw;
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) {
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step;

KerNeonXXs2NchwNchw44<
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>::
impl(src + src_offset, filter + weight_offset,
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc,
op);
}
if (ow_remain > 0) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step;
kern_big_oc_remain(
src + src_offset, filter + weight_offset, bias + oc_idx,
dst + dst_offset, ic, ih, iw, ld_dst_oc, op);
}
}
}

if (oc_remain > 0) {
size_t oc_idx = oc_end;
const size_t weight_offset = oc_idx * ic * fh * fw;
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) {
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step;
KerNeonXXs2NchwNchw44<
bias_mode, Op, ow_step, filter_size, oc_step, stride>::
impl(src + src_offset, filter + weight_offset,
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc,
op);
}
if (ow_remain > 0) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step;
kern_small_oc_remain(
src + src_offset, filter + weight_offset, bias + oc_idx,
dst + dst_offset, ic, ih, iw, ld_dst_oc, op);
}
}
}
}
};

#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \
template struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>;
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>;

#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \
INSTANCE_CONV_KERN_FUN( \
@@ -566,17 +133,10 @@ struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> {
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS)

#define INSTANCE_CONV_KERN(stride) \
INSTANCE_BIAS_MODE_PARAM(stride, 1) \
INSTANCE_BIAS_MODE_PARAM(stride, 2) \
INSTANCE_BIAS_MODE_PARAM(stride, 3) \
INSTANCE_BIAS_MODE_PARAM(stride, 5) \
INSTANCE_BIAS_MODE_PARAM(stride, 7)

INSTANCE_CONV_KERN(1);
#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter)

} // namespace int8_direct_nchw_nchw44
} // namespace arm_common
} // namespace megdnn

// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen

+ 481
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h View File

@@ -0,0 +1,481 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/

#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_common.h"
#include "src/arm_common/conv_bias/int8/direct_nchw_nchw44_kern.h"

namespace megdnn {
namespace arm_common {
namespace {
/**
* @brief core code for calculation patten
*
* @tparam src_idx is offset of src reg
* @tparam weight_idx is offset of weight reg
* @tparam c_dim is output channel
* @tparam Func mla operation funcion
* @tparam stride
* @tparam T outpur regs type
* @tparam T2 src regs type
* @tparam T3 weight regs type
* @tparam T4 temp regs type
*/

template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3, typename T4>
struct ShiftCalHelper {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp);
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight);
};
template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3, typename T4>
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight, T4& temp) {
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, T4>::impl(
c, src, weight, temp);
}
template <
int src_idx, int weight_idx, int c_dim, int stride, typename T, typename T2,
typename T3>
MEGDNN_ALWAYS_INLINE void cal_helper(T& c, T2& src, T3& weight) {
ShiftCalHelper<src_idx, weight_idx, c_dim, stride, T, T2, T3, int>::impl(
c, src, weight);
};
template <
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4>
struct ShiftCalHelper<src_idx, weight_idx, 2, 1, T, T2, T3, T4> {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) {
c[0][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]);
c[1][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[1][weight_idx], c[1][0], temp[1]);
c[0][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[2]);
c[1][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[1][weight_idx], c[1][1], temp[3]);
c[0][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[0]);
c[1][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[1][weight_idx], c[1][2], temp[1]);
c[0][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[2]);
c[1][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[1][weight_idx], c[1][3], temp[3]);

c[0][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]);
c[1][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[1][weight_idx], c[1][4], temp[1]);
c[0][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[2]);
c[1][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[1][weight_idx], c[1][5], temp[3]);
c[0][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[0]);
c[1][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[1][weight_idx], c[1][6], temp[1]);
c[0][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[2]);
c[1][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[1][weight_idx], c[1][7], temp[3]);
}
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&);
};
template <
int src_idx, int weight_idx, typename T, typename T2, typename T3, typename T4>
struct ShiftCalHelper<src_idx, weight_idx, 1, 1, T, T2, T3, T4> {
static MEGDNN_ALWAYS_INLINE void impl(T& c, T2& src, T3& weight, T4& temp) {
c[0][0] = vdotq_s32_h(
src[(0 + src_idx) % 8], weight[0][weight_idx], c[0][0], temp[0]);
c[0][1] = vdotq_s32_h(
src[(1 + src_idx) % 8], weight[0][weight_idx], c[0][1], temp[1]);
c[0][2] = vdotq_s32_h(
src[(2 + src_idx) % 8], weight[0][weight_idx], c[0][2], temp[2]);
c[0][3] = vdotq_s32_h(
src[(3 + src_idx) % 8], weight[0][weight_idx], c[0][3], temp[3]);
c[0][4] = vdotq_s32_h(
src[(4 + src_idx) % 8], weight[0][weight_idx], c[0][4], temp[0]);
c[0][5] = vdotq_s32_h(
src[(5 + src_idx) % 8], weight[0][weight_idx], c[0][5], temp[1]);
c[0][6] = vdotq_s32_h(
src[(6 + src_idx) % 8], weight[0][weight_idx], c[0][6], temp[2]);
c[0][7] = vdotq_s32_h(
src[(7 + src_idx) % 8], weight[0][weight_idx], c[0][7], temp[3]);
}
static MEGDNN_ALWAYS_INLINE void impl(T&, T2&, T3&);
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 1, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 1;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}

store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 2, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 2;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}

store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 3, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 3;
constexpr int filter_width = 4;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 1;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr, ld_weight_oc);

load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 0 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 1 * filter_width * oc_step, ld_weight_oc);

load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 1 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>(
dot4_weight, weight_ptr + 2 * filter_width * oc_step, ld_weight_oc);
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>(
src, nchw_src_ptr + 2 * iw * pack_iw_len, 0);
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c);

weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 5, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 5;
constexpr int filter_width = 8;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 2;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
#define cb(step) \
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c);
UNROLL_CALL_RAW(5, cb);
#undef cb
weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};

template <BiasMode bias_mode, typename Op, int remain_w, int oc_block>
struct KerNeonXXs2NchwNchw44<bias_mode, Op, remain_w, 7, oc_block, 1> {
static void impl(
const int8_t* src_ptr, const int8_t* weight_ptr, const int32_t* bias_ptr,
int8_t* dst_ptr, int ic, int ih, int iw, int ld_dst_oc, const Op& op) {
constexpr int stride = 1;
constexpr int filter_height = 7;
constexpr int filter_width = 8;
constexpr int oc_step = 4;
constexpr int loop_ic_step = 1;
constexpr int simd_len = 16;
constexpr int pack_iw_len = 16;
constexpr int src_reg = 8;
constexpr int weight_reg = 2;

const int ic_stride = ih * iw * pack_iw_len;
const int ld_weight_oc = oc_step * filter_height * filter_width * ic;
constexpr int c_dim = OCHelper<oc_block>::val;
int32x4_t c[c_dim][8];
init_ocx_ow8<c_dim, bias_mode, remain_w>(c, bias_ptr, oc_step);

for (int ic_idx = 0; ic_idx < ic; ic_idx += loop_ic_step) {
const int8_t* nchw_src_ptr = src_ptr + ic_idx * ic_stride;
int8x16_t src[src_reg];
int8x16_t dot4_weight[c_dim][weight_reg];
int16x8_t temp_c[4];
#define cb(step) \
load_helper<weight_reg, 0, simd_len, c_dim, Vld1q_s8>( \
dot4_weight, weight_ptr + step * filter_width * oc_step, ld_weight_oc); \
load_helper<src_reg, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len, 0); \
cal_helper<0, 0, c_dim, stride>(c, src, dot4_weight, temp_c); \
load_helper<4, 0, simd_len, 0, Vld1q_s8>( \
src, nchw_src_ptr + step * iw * pack_iw_len + src_reg * pack_iw_len, 0); \
cal_helper<4, 1, c_dim, stride>(c, src, dot4_weight, temp_c);

UNROLL_CALL_RAW(7, cb);
#undef cb
weight_ptr += oc_step * filter_height * filter_width;
}
store_ocx_ow8_remain_static_dt<c_dim, remain_w, Op, dt_qint8*>(
c, op, dst_ptr, ld_dst_oc);
}
};
} // namespace

namespace int8_direct_nchw_nchw44 {
/**
* pack {oc / 4, fh, fw, ic, 4(oc)} to {oc / 4, ic, fh ,fw/4, 4(oc)*4(fw)}
* pack interleave two adjacent row in filter to one row
* */
template <BiasMode bias_mode, typename Op, size_t filter_size>
struct ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, 1> {
static void impl(
const int8_t* src, const int8_t* filter, const int32_t* bias, int32_t* temp,
int8_t* dst, const size_t oc, const size_t ic, const size_t ih,
const size_t iw, const size_t oh, const size_t ow, const Op& op) {
MEGDNN_MARK_USED_VAR(temp);
constexpr int stride = 1;
constexpr size_t fh = filter_size;
constexpr size_t fw = (filter_size + 3) / 4 * 4;
constexpr size_t ic_step = 1;
constexpr size_t big_oc_step = 8;
constexpr size_t oc_step = 4;
constexpr size_t ih_step = 1;
constexpr size_t oh_step = 1;
constexpr size_t ow_step = 8;
constexpr size_t stride_h = stride;
constexpr size_t stride_w = stride;
constexpr int pack_iw_len = 16;

const size_t img_stride = oh * ow;
const size_t ow_end = ow / ow_step * ow_step;
const size_t ow_remain = ow - ow_end;
const size_t oc_end = oc / big_oc_step * big_oc_step;
const size_t oc_remain = oc - oc_end;
const int ld_dst_oc = oc_step * img_stride;

using remain_fun = std::function<void(
const int8_t* src_ptr, const int8_t* weight_ptr,
const int32_t* bias_ptr, int8_t* dst_ptr, int ic, int ih, int iw,
int ld_dst_oc, const Op& op)>;
remain_fun kern_big_oc_remain = nullptr;
remain_fun kern_small_oc_remain = nullptr;
switch (ow_remain) {
#define cb(step) \
case step: \
kern_big_oc_remain = KerNeonXXs2NchwNchw44< \
bias_mode, Op, step, filter_size, big_oc_step, stride>::impl; \
kern_small_oc_remain = KerNeonXXs2NchwNchw44< \
bias_mode, Op, step, filter_size, oc_step, stride>::impl; \
break;

UNROLL_CALL_RAW(8, cb);
default:
megdnn_assert(0, "no remain %zu for kern", ow_remain);
}

for (size_t oc_idx = 0; oc_idx < oc_end; oc_idx += big_oc_step) {
const size_t weight_offset = oc_idx * ic * fh * fw;
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) {
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step;

KerNeonXXs2NchwNchw44<
bias_mode, Op, ow_step, filter_size, big_oc_step, stride>::
impl(src + src_offset, filter + weight_offset,
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc,
op);
}
if (ow_remain > 0) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step;
kern_big_oc_remain(
src + src_offset, filter + weight_offset, bias + oc_idx,
dst + dst_offset, ic, ih, iw, ld_dst_oc, op);
}
}
}

if (oc_remain > 0) {
size_t oc_idx = oc_end;
const size_t weight_offset = oc_idx * ic * fh * fw;
for (size_t oh_idx = 0; oh_idx < oh; oh_idx += oh_step) {
for (size_t ow_idx = 0; ow_idx < ow_end; ow_idx += ow_step) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_idx * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_idx) * oc_step;
KerNeonXXs2NchwNchw44<
bias_mode, Op, ow_step, filter_size, oc_step, stride>::
impl(src + src_offset, filter + weight_offset,
bias + oc_idx, dst + dst_offset, ic, ih, iw, ld_dst_oc,
op);
}
if (ow_remain > 0) {
const size_t src_offset =
(oh_idx * stride_h * iw + ow_end * stride_w * ih_step) *
ic_step * pack_iw_len;
const size_t dst_offset =
oc_idx * img_stride + (oh_idx * ow + ow_end) * oc_step;
kern_small_oc_remain(
src + src_offset, filter + weight_offset, bias + oc_idx,
dst + dst_offset, ic, ih, iw, ld_dst_oc, op);
}
}
}
}
};

#define INSTANCE_CONV_KERN_FUN(stride, filter_size, bias_mode, Op) \
template struct megdnn::arm_common::int8_direct_nchw_nchw44:: \
ConvDiectStrideInt8NchwNchw44<bias_mode, Op, filter_size, stride>;

#define INSTANCE_OP_PARAM(stride, filter, bias_mode) \
INSTANCE_CONV_KERN_FUN( \
stride, filter, bias_mode, TypeCvtOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \
INSTANCE_CONV_KERN_FUN( \
stride, filter, bias_mode, ReluOp<dt_qint32 MEGDNN_COMMA dt_qint8>) \
INSTANCE_CONV_KERN_FUN( \
stride, filter, bias_mode, HSwishOp<dt_qint32 MEGDNN_COMMA dt_qint8>)

#define INSTANCE_BIAS_MODE_PARAM(stride, filter) \
INSTANCE_OP_PARAM(stride, filter, BiasMode::NO_BIAS) \
INSTANCE_OP_PARAM(stride, filter, BiasMode::BROADCAST_CHANNEL_BIAS)

#define INSTANCE_CONV_KERN(stride, filter) INSTANCE_BIAS_MODE_PARAM(stride, filter)

} // namespace int8_direct_nchw_nchw44
} // namespace arm_common
} // namespace megdnn

// vim: syntax=cpp.doxygen

+ 19
- 0
dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp View File

@@ -0,0 +1,19 @@
/**
* \file
* dnn/src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1_1x1.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "src/arm_common/conv_bias/int8/direct_kernels/int8_direct_nchw_nchw44_s1.h"
using namespace megdnn;
using namespace arm_common;

INSTANCE_CONV_KERN(1, 1);

// vim: syntax=cpp.doxygen

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save