Browse Source

回退 'Pull Request !187 : sync src code and update the source folder location'

pull/194/head
lujiale Gitee 4 years ago
parent
commit
e0032f656f
100 changed files with 2242 additions and 11274 deletions
  1. +0
    -8
      .gitmodules
  2. +119
    -117
      CMakeLists.txt
  3. +24
    -100
      build.sh
  4. +0
    -23
      cmake/FindModule.cmake
  5. +22
    -0
      cmake/external_libs/eigen.cmake
  6. +0
    -39
      cmake/external_libs/gflags.cmake
  7. +24
    -0
      cmake/external_libs/gtest.cmake
  8. +18
    -22
      cmake/external_libs/json.cmake
  9. +13
    -29
      cmake/external_libs/onnx.cmake
  10. +63
    -0
      cmake/external_libs/protobuf.cmake
  11. +0
    -59
      cmake/external_libs/protobuf_shared.cmake
  12. +0
    -43
      cmake/external_libs/protobuf_static.cmake
  13. +0
    -103
      cmake/external_libs/protoc.cmake
  14. +11
    -62
      cmake/external_libs/securec.cmake
  15. +371
    -0
      cmake/ge_utils.cmake
  16. +0
    -52
      cmake/intf_pub_android.cmake
  17. +0
    -33
      cmake/intf_pub_linux.cmake
  18. +0
    -24
      cmake/intf_pub_windows.cmake
  19. +0
    -910
      ge/CMakeLists.txt
  20. +0
    -0
      ge/README.md
  21. +0
    -171
      ge/common/CMakeLists.txt
  22. +0
    -206
      ge/common/proto/ge_ir.proto
  23. +0
    -152
      ge/common/proto/insert_op.proto
  24. +0
    -401
      ge/common/proto/om.proto
  25. +0
    -170
      ge/common/proto/task.proto
  26. +0
    -62
      ge/common/proto/tensorflow/attr_value.proto
  27. +0
    -100
      ge/common/proto/tensorflow/function.proto
  28. +0
    -56
      ge/common/proto/tensorflow/graph.proto
  29. +0
    -14
      ge/common/proto/tensorflow/graph_library.proto
  30. +0
    -63
      ge/common/proto/tensorflow/node_def.proto
  31. +0
    -164
      ge/common/proto/tensorflow/op_def.proto
  32. +0
    -29
      ge/common/proto/tensorflow/resource_handle.proto
  33. +0
    -94
      ge/common/proto/tensorflow/tensor.proto
  34. +0
    -45
      ge/common/proto/tensorflow/tensor_shape.proto
  35. +0
    -74
      ge/common/proto/tensorflow/types.proto
  36. +0
    -31
      ge/common/proto/tensorflow/versions.proto
  37. +0
    -115
      ge/executor/CMakeLists.txt
  38. +0
    -206
      ge/executor/proto/ge_ir.proto
  39. +0
    -152
      ge/executor/proto/insert_op.proto
  40. +0
    -401
      ge/executor/proto/om.proto
  41. +0
    -89
      ge/executor/proto/op_mapping_info.proto
  42. +0
    -170
      ge/executor/proto/task.proto
  43. +0
    -225
      ge/ge_local_engine/CMakeLists.txt
  44. +0
    -170
      ge/ge_local_engine/proto/task.proto
  45. +0
    -65
      ge/ge_runtime/CMakeLists.txt
  46. +0
    -66
      ge/ge_runtime/module.mk
  47. +0
    -38
      ge/graph/build/memory/CMakeLists.txt
  48. +0
    -214
      ge/host_cpu_engine/CMakeLists.txt
  49. +0
    -1
      ge/host_cpu_engine/proto/task.proto
  50. +0
    -81
      ge/offline/CMakeLists.txt
  51. +0
    -1334
      ge/offline/main.cc
  52. +0
    -52
      ge/offline/module.mk
  53. +0
    -1
      ge/offline/proto/ge_ir.proto
  54. +0
    -1
      ge/offline/proto/insert_op.proto
  55. +0
    -1
      ge/offline/proto/om.proto
  56. +0
    -1
      ge/offline/proto/task.proto
  57. +0
    -503
      ge/offline/single_op_parser.cc
  58. +0
    -78
      ge/offline/single_op_parser.h
  59. +0
    -49
      ge/plugin/engine/CMakeLists.txt
  60. +0
    -1821
      ge/proto/caffe/caffe.proto
  61. +0
    -127
      ge/proto/dump_task.proto
  62. +0
    -104
      ge/proto/ge_api.proto
  63. +0
    -206
      ge/proto/ge_ir.proto
  64. +0
    -152
      ge/proto/insert_op.proto
  65. +0
    -401
      ge/proto/om.proto
  66. +0
    -89
      ge/proto/op_mapping_info.proto
  67. +0
    -170
      ge/proto/task.proto
  68. +0
    -62
      ge/proto/tensorflow/attr_value.proto
  69. +0
    -100
      ge/proto/tensorflow/function.proto
  70. +0
    -56
      ge/proto/tensorflow/graph.proto
  71. +0
    -14
      ge/proto/tensorflow/graph_library.proto
  72. +0
    -63
      ge/proto/tensorflow/node_def.proto
  73. +0
    -164
      ge/proto/tensorflow/op_def.proto
  74. +0
    -29
      ge/proto/tensorflow/resource_handle.proto
  75. +0
    -94
      ge/proto/tensorflow/tensor.proto
  76. +0
    -45
      ge/proto/tensorflow/tensor_shape.proto
  77. +0
    -74
      ge/proto/tensorflow/types.proto
  78. +0
    -31
      ge/proto/tensorflow/versions.proto
  79. +0
    -3
      ge/session/readme.txt
  80. +141
    -0
      inc/common/blocking_queue.h
  81. +104
    -0
      inc/common/dynamic_aipp.h
  82. +94
    -0
      inc/common/npu_error_define.h
  83. +74
    -0
      inc/common/opskernel/ge_task_info.h
  84. +88
    -0
      inc/common/opskernel/ops_kernel_info_store.h
  85. +66
    -0
      inc/common/opskernel/ops_kernel_info_types.h
  86. +71
    -0
      inc/common/optimizer/graph_optimizer.h
  87. +34
    -0
      inc/common/optimizer/graph_optimizer_types.h
  88. +41
    -0
      inc/common/util/ai_core/common/aicore_util_attr_define.h
  89. +118
    -0
      inc/common/util/ai_core/common/aicore_util_types.h
  90. +107
    -0
      inc/common/util/ai_core/common/graph_comm.h
  91. +43
    -0
      inc/common/util/ai_core/common/scope_allocator.h
  92. +33
    -0
      inc/common/util/ai_core/param_calculate/aicore_param_calculator.h
  93. +45
    -0
      inc/common/util/ai_core/param_calculate/tensorsize_calculator.h
  94. +37
    -0
      inc/common/util/compress/compress.h
  95. +33
    -0
      inc/common/util/compress/compress_weight.h
  96. +94
    -0
      inc/common/util/error_manager/error_manager.h
  97. +101
    -0
      inc/common/util/platform_info.h
  98. +140
    -0
      inc/common/util/platform_info_def.h
  99. +75
    -0
      inc/external/graph/attr_value.h
  100. +38
    -0
      inc/external/graph/ge_error_codes.h

+ 0
- 8
.gitmodules View File

@@ -1,8 +0,0 @@
[submodule "parser"]
path = parser
url = https://gitee.com/ascend/parser.git
branch = master
[submodule "metadef"]
path = metadef
url = https://gitee.com/ascend/metadef.git
branch = master

+ 119
- 117
CMakeLists.txt View File

@@ -1,133 +1,135 @@
# Copyright 2019-2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

cmake_minimum_required(VERSION 3.14)
project (GraphEngine[CXX])
set(CMAKE_CXX_STANDARD 17)
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)

set(GE_CODE_DIR ${CMAKE_CURRENT_LIST_DIR})
set(CMAKE_SKIP_INSTALL_ALL_DEPENDENCY TRUE)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
set(GE_SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR})
set(GE_PROTO_DIR ${GE_SOURCE_DIR}/src)

if (NOT BUILD_PATH)
set(BUILD_PATH "${CMAKE_SOURCE_DIR}/build")
endif()
# architecture: aarch64 or x86_64
message(STATUS "System architecture: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
# system: euleros or ubuntu
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
execute_process(
COMMAND bash "-c" "cat /etc/os-release | grep ^ID= | awk -F '=' '{print $2}'"
OUTPUT_VARIABLE SYSTEM_TYPE
)
MESSAGE(STATUS "System type: ${SYSTEM_TYPE}.")
endif()

option(ENABLE_OPEN_SRC "Enable graphengine compile in opensource." FALSE)

if (ENABLE_OPEN_SRC)
set(HI_PYTHON python3.7)

include(cmake/external_libs/protobuf_shared.cmake)
include(cmake/external_libs/protobuf_static.cmake)
include(cmake/external_libs/protoc.cmake)
include(cmake/external_libs/gflags.cmake)
include(cmake/external_libs/securec.cmake)
include(cmake/external_libs/json.cmake)
include(cmake/FindModule.cmake)
include(cmake/intf_pub_linux.cmake)
# download json headers, rather than whole repository
include(${GE_SOURCE_DIR}/cmake/ge_utils.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/json.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/eigen.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/gtest.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/protobuf.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/onnx.cmake)
include(${GE_SOURCE_DIR}/cmake/external_libs/securec.cmake)
set(CMAKE_SKIP_RPATH TRUE)

# for CPU/GPU mode, find c_sec and slog from local prebuild
#if(NOT ENABLE_D AND NOT GE_ONLY)
# set(GE_PREBUILD_PATH ${GE_CODE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
# find_module(slog libslog.so ${GE_PREBUILD_PATH})
# if D_LINK_PATH is set in environment variables, search libraries in given path
if(DEFINED ENV{D_LINK_PATH})
# D_LINK_PATH is set
set(GE_LIB_PATH $ENV{D_LINK_PATH})
set(GE_SYS_ARCH "")
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
# x86 ubuntu
set(GE_SYS_ARCH "x86_64")
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
# arm euleros
set(GE_SYS_ARCH "aarch64")
else()
message(FATAL_ERROR "Running on a unsupported architecture: ${SYSTEM_TYPE}, build terminated")
endif()
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
set(STATIC_ACL_LIB ${GE_LIB_PATH})
find_module(slog libslog.so ${GE_LIB_PATH})
find_module(mmpa libmmpa.so ${GE_LIB_PATH})
find_module(msprof libmsprof.so ${GE_LIB_PATH})
find_module(hccl libhccl.so ${GE_LIB_PATH})
find_module(adump_server libadump_server.a ${GE_LIB_PATH})
find_module(runtime libruntime.so ${GE_LIB_PATH})
find_module(runtime_compile libruntime_compile.so ${GE_LIB_PATH})
find_module(resource libresource.so ${GE_LIB_PATH})
find_module(error_manager liberror_manager.so ${GE_LIB_PATH})
find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
# for CPU/GPU mode, find c_sec and slog from local prebuild
if(NOT ENABLE_D AND NOT GE_ONLY)
set(GE_PREBUILD_PATH ${GE_SOURCE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
find_library(slog libslog.so ${GE_PREBUILD_PATH})
# if D_LINK_PATH is set in environment variables, search libraries in given path
elseif(DEFINED ENV{D_LINK_PATH})
# D_LINK_PATH is set
set(GE_LIB_PATH $ENV{D_LINK_PATH})
set(GE_SYS_ARCH "")
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
# x86 ubuntu
set(GE_SYS_ARCH "x86_64")
elseif(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
# arm euleros
set(GE_SYS_ARCH "aarch64")
else()
message(FATAL_ERROR "Running on a unsupported architecture: ${SYSTEM_TYPE}, build terminated")
endif()
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
find_library(slog libslog.so ${GE_LIB_PATH})
find_library(mmpa libmmpa.so ${GE_LIB_PATH})
find_library(runtime libruntime.so ${GE_LIB_PATH})
find_library(msprof libmsprofiler.a ${GE_LIB_PATH})
find_library(register libregister.so ${GE_LIB_PATH})
find_library(hccl libhccl.so ${GE_LIB_PATH})
find_library(resource libresource.so ${GE_LIB_PATH})
find_library(error_manager liberror_manager.so ${GE_LIB_PATH})
find_library(adump_server libadump_server.a ${GE_LIB_PATH})
else()
# Ascend mode
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH})
else()
if(DEFINED ENV{ASCEND_CUSTOM_PATH})
set(ASCEND_DIR $ENV{ASCEND_CUSTOM_PATH})
else()
set(ASCEND_DIR /usr/local/Ascend)
endif()
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64)
set(ASCEND_DRIVER_COMMON_DIR ${ASCEND_DIR}/driver/lib64/common)
set(ASCEND_DRIVER_SHARE_DIR ${ASCEND_DIR}/driver/lib64/share)
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
set(ASCEND_ATC_DIR ${ASCEND_DIR}/atc/lib64)
set(ASCEND_ACL_DIR ${ASCEND_DIR}/acllib/lib64)
set(STATIC_ACL_LIB ${ASCEND_ACL_DIR})
find_module(slog libslog.so ${ASCEND_ATC_DIR})
find_module(mmpa libmmpa.so ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_module(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
find_module(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_module(resource libresource.so ${ASCEND_RUNTIME_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_RUNTIME_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
if(PRODUCT STREQUAL "flr3")
message(FATAL_ERROR "This platform is not supported in train mode, build terminated")
endif()
elseif(PLATFORM STREQUAL "inference")
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(resource libresource.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR})
elseif(PRODUCT STREQUAL "flr1")
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
elseif(PRODUCT STREQUAL "flr2")
# flr2 ascend_hal_stub limsprof ?
else()
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR})
find_module(msprof libmsprof.so ${ASCEND_DRIVER_DIR})
endif()
elseif(PLATFORM STREQUAL "all")
find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_module(adump_server libadump_server.a ${ASCEND_ACL_DIR})
find_module(runtime libruntime.so ${ASCEND_ACL_DIR})
find_module(runtime_compile libruntime_compile.so ${ASCEND_ATC_DIR})
find_module(resource libresource.so ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
else()
message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated")
endif()
set(ASCEND_DIR /usr/local/Ascend)
endif()
set(ASCEND_DRIVER_DIR ${ASCEND_DIR}/driver/lib64/common)
set(ASCEND_RUNTIME_DIR ${ASCEND_DIR}/fwkacllib/lib64)
find_library(slog libslog.so ${ASCEND_DRIVER_DIR})
find_library(mmpa libmmpa.so ${ASCEND_DRIVER_DIR})
find_library(msprof libmsprofiler.a ${ASCEND_RUNTIME_DIR})

set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/parser)
set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..)
find_library(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
find_library(runtime libruntime.so ${ASCEND_RUNTIME_DIR})
find_library(register libregister.so ${ASCEND_RUNTIME_DIR})
find_library(resource libresource.so ${ASCEND_RUNTIME_DIR})
find_library(error_manager liberror_manager.so ${ASCEND_RUNTIME_DIR})
find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_DIR})
endif()

add_subdirectory(metadef)
add_subdirectory(parser)
#add_subdirectory(metadef/graph)
#add_subdirectory(metadef/register)
# add compile flags
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
message("Build in Debug mode")
set(CMAKE_C_FLAGS "-O0 -g -Wall -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe -fPIC ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-O0 -g -Wall -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe -fPIC ${CMAKE_CXX_FLAGS}")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -rdynamic")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic")
endif()
else()
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/../metadef)
set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/../parser)
set(GE_DEPEND_DIR ${CMAKE_CURRENT_LIST_DIR}/..)
set(CMAKE_C_FLAGS "-O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_C_FLAGS}")
set(CMAKE_CXX_FLAGS "-O2 -Wall -fPIC -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack -pipe ${CMAKE_CXX_FLAGS}")
endif ()

# force __FILE__ to show relative path of file, from source directory, as cmake project makes __FILE__ absolute directory
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__FILE__='\"$(subst $(realpath ${CMAKE_SOURCE_DIR})/,,$(abspath $<))\"' -Wno-builtin-macro-redefined")

# compile libraries from following directories
# libgraph is compiled in any situation
add_subdirectory(${GE_SOURCE_DIR}/src/common/graph)
if(ENABLE_D)
# if MindSpore compiles in D mode, compile the following libraries
add_subdirectory(${GE_SOURCE_DIR}/src/ge/common)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/ge_runtime)
elseif(GE_ONLY)
# standalone GraphEngine compiles all following libraries
add_subdirectory(${GE_SOURCE_DIR}/src/ge/common)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/ge_runtime)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/ge_local_engine)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/graph/build/memory)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/)
add_subdirectory(${GE_SOURCE_DIR}/src/ge/plugin/engine)
endif()

add_subdirectory(ge)
# if (ENABLE_GE_COV OR ENABLE_GE_UT OR ENABLE_GE_ST)
# add_subdirectory(tests)
# endif()


+ 24
- 100
build.sh View File

@@ -23,7 +23,7 @@ export BUILD_PATH="${BASEPATH}/build/"
usage()
{
echo "Usage:"
echo "sh build.sh [-j[n]] [-h] [-v] [-s] [-t] [-u] [-c] [-p]"
echo "sh build.sh [-j[n]] [-h] [-v] [-s] [-t] [-u] [-c]"
echo ""
echo "Options:"
echo " -h Print usage"
@@ -32,7 +32,6 @@ usage()
echo " -j[n] Set the number of threads used for building GraphEngine, default is 8"
echo " -t Build and execute ut"
echo " -c Build ut with coverage tag"
echo " -p Build inference or train"
echo " -v Display build command"
echo "to be continued ..."
}
@@ -47,10 +46,8 @@ checkopts()
ENABLE_GE_ST="off"
ENABLE_GE_COV="off"
GE_ONLY="on"
PLATFORM="inference"
PRODUCT="normal"
# Process the options
while getopts 'ustchj:p:g:v' opt
while getopts 'ustchj:v' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
@@ -80,12 +77,6 @@ checkopts()
v)
VERBOSE="VERBOSE=1"
;;
p)
PLATFORM=$OPTARG
;;
g)
PRODUCT=$OPTARG
;;
*)
echo "Undefined option: ${opt}"
usage
@@ -95,9 +86,6 @@ checkopts()
}
checkopts "$@"

git submodule update --init metadef
git submodule update --init parser

mk_dir() {
local create_dir="$1" # the target to make

@@ -112,8 +100,8 @@ echo "---------------- GraphEngine build start ----------------"
build_graphengine()
{
echo "create build directory and build GraphEngine";
mk_dir "${BUILD_PATH}"
cd "${BUILD_PATH}"
mk_dir "${BUILD_PATH}/graphengine"
cd "${BUILD_PATH}/graphengine"
CMAKE_ARGS="-DBUILD_PATH=$BUILD_PATH -DGE_ONLY=$GE_ONLY"

if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
@@ -129,42 +117,17 @@ build_graphengine()
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_GE_ST=ON"
fi

CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_OPEN_SRC=True -DCMAKE_INSTALL_PREFIX=${OUTPUT_PATH} -DPLATFORM=${PLATFORM} -DPRODUCT=${PRODUCT}"
echo "${CMAKE_ARGS}"
cmake ${CMAKE_ARGS} ..
if [ $? -ne 0 ]
then
echo "execute command: cmake ${CMAKE_ARGS} .. failed."
return 1
fi
COMMON_TARGET="ge_common engine fmk_parser parser_common _caffe_parser fmk_onnx_parser graph register engine_conf.json optimizer_priority.pbtxt "
TARGET=${COMMON_TARGET}
if [ "x${PLATFORM}" = "xtrain" ]
then
TARGET="ge_runner ge_local_engine ge_local_opskernel_builder host_cpu_engine host_cpu_opskernel_builder ${TARGET}"
elif [ "x${PLATFORM}" = "xinference" ]
then
TARGET="ge_compiler atc_ge_local_engine atc_ge_local_opskernel_builder atc_host_cpu_engine atc_host_cpu_opskernel_builder atc opensrc_ascendcl ${TARGET}"
elif [ "x${PLATFORM}" = "xall" ]
then
# build all the target
TARGET=""
fi
make ${VERBOSE} ${TARGET} -j${THREAD_NUM} && make install
if [ $? -ne 0 ]
then
echo "execute command: make ${VERBOSE} -j${THREAD_NUM} && make install failed."
return 1
fi
cmake ${CMAKE_ARGS} ../..
make ${VERBOSE} -j${THREAD_NUM}
echo "GraphEngine build success!"
}
g++ -v
mk_dir ${OUTPUT_PATH}
build_graphengine || { echo "GraphEngine build failed."; return; }
build_graphengine
echo "---------------- GraphEngine build finished ----------------"
#cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}"
#rm -rf "${OUTPUT_PATH}/"libproto*
mk_dir ${OUTPUT_PATH}
cp -rf "${BUILD_PATH}/graphengine/"*.so "${OUTPUT_PATH}"
rm -rf "${OUTPUT_PATH}/"libproto*
rm -f ${OUTPUT_PATH}/libgmock*.so
rm -f ${OUTPUT_PATH}/libgtest*.so
rm -f ${OUTPUT_PATH}/lib*_stub.so
@@ -212,82 +175,43 @@ echo "---------------- GraphEngine output generated ----------------"
generate_package()
{
cd "${BASEPATH}"

GRAPHENGINE_LIB_PATH="lib"
ACL_PATH="acllib/lib64"
FWK_PATH="fwkacllib/lib64"
ATC_PATH="atc/lib64"
ATC_BIN_PATH="atc/bin"
NNENGINE_PATH="plugin/nnengine/ge_config"
OPSKERNEL_PATH="plugin/opskernel"

ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so" "libregister.so")
FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so" "libregister.so")
PLUGIN_OPSKERNEL=("libge_local_engine.so" "libge_local_opskernel_builder.so" "libhost_cpu_engine.so" "libhost_cpu_opskernel_builder.so" "optimizer_priority.pbtxt")
PARSER_LIB=("lib_caffe_parser.so" "libfmk_onnx_parser.so" "libfmk_parser.so" "libparser_common.so")
ATC_LIB=("libc_sec.so" "libge_common.so" "libge_compiler.so" "libgraph.so")
FWK_LIB=("libge_common.so" "libge_runner.so" "libgraph.so")

rm -rf ${OUTPUT_PATH:?}/${FWK_PATH}/
rm -rf ${OUTPUT_PATH:?}/${ACL_PATH}/
rm -rf ${OUTPUT_PATH:?}/${ATC_PATH}/
rm -rf ${OUTPUT_PATH:?}/${ATC_BIN_PATH}/

mk_dir "${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}"
mk_dir "${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_PATH}/${OPSKERNEL_PATH}"
mk_dir "${OUTPUT_PATH}/${ACL_PATH}"
mk_dir "${OUTPUT_PATH}/${ATC_BIN_PATH}"
cd "${OUTPUT_PATH}"

find ./ -name graphengine_lib.tar -exec rm {} \;
find output/ -name graphengine_lib.tar -exec rm {} \;
cp src/ge/engine_manager/engine_conf.json ${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}
cp src/ge/engine_manager/engine_conf.json ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}

cp ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH}/engine_conf.json ${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}
cp ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH}/engine_conf.json ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}
find output/ -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}/../ \;
find output/ -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \;

find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${NNENGINE_PATH}/../ \;
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name libengine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${NNENGINE_PATH}/../ \;
find output/ -maxdepth 1 -name libge_local_engine.so -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \;
find output/ -maxdepth 1 -name libge_local_engine.so -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${OPSKERNEL_PATH} \;

MAX_DEPTH=1
if [ "x${PLATFORM}" = "xall" ] || [ "x${PLATFORM}" = "xinference" ]
then
MAX_DEPTH=2
fi
for lib in "${PLUGIN_OPSKERNEL[@]}";
do
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH}/${OPSKERNEL_PATH} \;
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth ${MAX_DEPTH} -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH}/${OPSKERNEL_PATH} \;
done

for lib in "${PARSER_LIB[@]}";
cd "${OUTPUT_PATH}"
for lib in "${ATC_LIB[@]}";
do
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH} \;
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH} \;
cp "$lib" "${OUTPUT_PATH}/${ATC_PATH}"
done

for lib in "${FWK_LIB[@]}";
do
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${FWK_PATH} \;
cp "$lib" "${OUTPUT_PATH}/${FWK_PATH}"
done

for lib in "${ATC_LIB[@]}";
do
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "$lib" -exec cp -f {} ${OUTPUT_PATH}/${ATC_PATH} \;
done

find ./bin -name atc -exec cp {} "${OUTPUT_PATH}/${ATC_BIN_PATH}" \;
find ${OUTPUT_PATH}/${GRAPHENGINE_LIB_PATH} -maxdepth 1 -name "libascendcl.so" -exec cp -f {} ${OUTPUT_PATH}/${ACL_PATH} \;
if [ "x${PLATFORM}" = "xtrain" ]
then
tar -cf graphengine_lib.tar fwkacllib
elif [ "x${PLATFORM}" = "xinference" ]
then
tar -cf graphengine_lib.tar acllib atc
elif [ "x${PLATFORM}" = "xall" ]
then
tar -cf graphengine_lib.tar fwkacllib acllib atc
fi
tar -cf graphengine_lib.tar fwkacllib/ atc/
}

if [[ "X$ENABLE_GE_UT" = "Xoff" ]]; then


+ 0
- 23
cmake/FindModule.cmake View File

@@ -1,23 +0,0 @@
#[[
module - the name of export imported target
name - find the library name
path - find the library path
#]]
function(find_module module name path)
if (TARGET ${module})
return()
endif()
find_library(${module}_LIBRARY_DIR NAMES ${name} NAMES_PER_DIR PATHS ${path}
PATH_SUFFIXES lib
)

message(STATUS "find ${name} location ${${module}_LIBRARY_DIR}")
if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND")
message(FATAL_ERROR "${name} not found in ${path}")
endif()
add_library(${module} SHARED IMPORTED)
set_target_properties(${module} PROPERTIES
IMPORTED_LOCATION ${${module}_LIBRARY_DIR}
)
endfunction()

+ 22
- 0
cmake/external_libs/eigen.cmake View File

@@ -0,0 +1,22 @@
set(Eigen3_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
set(Eigen3_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
set(Eigen3_NS "ge_")

if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/eigen-git-mirrorsource/repository/archive/3.3.7.tar.gz")
set(MD5 "cf6552a5d90c1aca4b5e0b011f65ea93")
else()
set(REQ_URL "https://gitlab.com/libeigen/eigen/-/archive/3.3.7/eigen-3.3.7.tar.gz")
set(MD5 "9e30f67e8531477de4117506fe44669b")
endif ()

graphengine_add_pkg(Eigen3
VER 3.3.7
URL ${REQ_URL}
MD5 ${MD5}
CMAKE_OPTION -DBUILD_TESTING=OFF)

find_package(Eigen3 3.3.7 REQUIRED ${GE_FIND_NO_DEFAULT_PATH})
set_property(TARGET Eigen3::Eigen PROPERTY IMPORTED_GLOBAL TRUE)
add_library(graphengine::eigen ALIAS Eigen3::Eigen)
include_directories(${EIGEN3_INCLUDE_DIRS})

+ 0
- 39
cmake/external_libs/gflags.cmake View File

@@ -1,39 +0,0 @@
if (HAVE_GFLAGS)
return()
endif()

include(ExternalProject)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

ExternalProject_Add(gflags_build
URL https://github.com/gflags/gflags/archive/v2.2.2.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/gflags/src/gflags-2.2.2
CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)

set(GFLAGS_PKG_DIR ${CMAKE_INSTALL_PREFIX}/gflags)

add_library(gflags_static STATIC IMPORTED)

set_target_properties(gflags_static PROPERTIES
IMPORTED_LOCATION ${GFLAGS_PKG_DIR}/lib/libgflags.a
)

add_library(gflags INTERFACE)
target_include_directories(gflags INTERFACE ${GFLAGS_PKG_DIR}/include)
target_link_libraries(gflags INTERFACE gflags_static)

add_dependencies(gflags gflags_build)

#set(HAVE_GFLAGS TRUE CACHE BOOL "gflags build add")
set(HAVE_GFLAGS TRUE)

+ 24
- 0
cmake/external_libs/gtest.cmake View File

@@ -0,0 +1,24 @@
set(ge_gtest_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
set(ge_gtest_CFLAGS "-D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")

if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
set(MD5 "89e13ca1aa48d370719d58010b83f62c")
else()
set(REQ_URL "https://github.com/google/googletest/archive/release-1.8.0.tar.gz")
set(MD5 "16877098823401d1bf2ed7891d7dce36")
endif ()

graphengine_add_pkg(ge_gtest
VER 1.8.0
LIBS gtest gtest_main
URL ${REQ_URL}
MD5 ${MD5}
CMAKE_OPTION -DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON
-DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON)

add_library(graphengine::gtest ALIAS ge_gtest::gtest)
add_library(graphengine::gtest_main ALIAS ge_gtest::gtest_main)
include_directories(${ge_gtest_INC})
file(COPY ${ge_gtest_INC}/../lib/libgtest.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine)
file(COPY ${ge_gtest_INC}/../lib/libgtest_main.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine)

+ 18
- 22
cmake/external_libs/json.cmake View File

@@ -1,24 +1,20 @@
if (HAVE_JSON)
return()
endif()
set(nlohmann_json_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
set(nlohmann_json_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")

include(ExternalProject)
if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
set(INCLUDE "./include")
else()
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(INCLUDE "./")
endif ()

set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
ExternalProject_Add(json_build
URL https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip
#URL /home/txd/workspace/cloud_code/pkg/include.zip
SOURCE_DIR ${JSON_SRC_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
EXCLUDE_FROM_ALL TRUE
)


add_library(json INTERFACE)
target_include_directories(json INTERFACE ${JSON_SRC_DIR})
add_dependencies(json json_build)

#set(HAVE_JSON TRUE CACHE BOOL "json build add")
set(HAVE_JSON TRUE)
graphengine_add_pkg(ge_nlohmann_json
VER 3.6.1
HEAD_ONLY ${INCLUDE}
URL ${REQ_URL}
MD5 ${MD5})
include_directories(${ge_nlohmann_json_INC})
add_library(graphengine::json ALIAS ge_nlohmann_json)

+ 13
- 29
cmake/external_libs/onnx.cmake View File

@@ -1,29 +1,13 @@
include(ExternalProject)

#set(ONNX_SRC_DIR /home/txd/workspace/cloud_code/graphengine/build/graphengine/open_source/onnx)
#set(ONNX_PROTO ${ONNX_SRC_DIR}/onnx/onnx.proto)
set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

ExternalProject_Add(onnx
URL https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz
#URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
#URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
#SOURCE_DIR ${ONNX_SRC_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
#INSTALL_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy <SOURCE_DIR>/onnx/onnx.proto ${ONNX_PROTO_FILE}
#BUILD_ALWAYS TRUE
EXCLUDE_FROM_ALL TRUE
)

macro(onnx_protobuf_generate comp c_var h_var)
add_custom_command(OUTPUT ${ONNX_PROTO_FILE}
DEPENDS onnx
)
ge_protobuf_generate(${comp} ${c_var} ${h_var} ${ONNX_PROTO_FILE})
endmacro()


if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
set(MD5 "1bdbcecdd68ea8392630467646776e02")
else()
set(REQ_URL "https://github.com/onnx/onnx/releases/download/v1.6.0/onnx-1.6.0.tar.gz")
set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
endif ()

graphengine_add_pkg(onnx
VER 1.6.0
HEAD_ONLY ./
URL ${REQ_URL}
MD5 ${MD5})

+ 63
- 0
cmake/external_libs/protobuf.cmake View File

@@ -0,0 +1,63 @@
if (NOT TARGET protobuf::protobuf)
set(protobuf_USE_STATIC_LIBS ON)
set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
set(_ge_tmp_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE " -Wall" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE " -Werror" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/protobuf_source/repository/archive/v3.8.0.tar.gz")
set(MD5 "eba86ae9f07ba5cfbaf8af3bc4e84236")
else()
set(REQ_URL "https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz")
set(MD5 "3d9e32700639618a4d2d342c99d4507a")
endif ()

graphengine_add_pkg(protobuf
VER 3.8.0
LIBS protobuf
EXE protoc
URL ${REQ_URL}
MD5 ${MD5}
CMAKE_PATH ../cmake/
CMAKE_OPTION -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_BUILD_SHARED_LIBS=OFF)
set(CMAKE_CXX_FLAGS ${_ge_tmp_CMAKE_CXX_FLAGS})
endif()
add_library(graphengine::protobuf ALIAS protobuf::protobuf)
set(PROTOBUF_LIBRARY protobuf::protobuf)
include_directories(${protobuf_INC})
include_directories(${protobuf_DIRPATH}/src)

function(ge_protobuf_generate comp c_var h_var)
if(NOT ARGN)
message(SEND_ERROR "Error: ge_protobuf_generate() called without any proto files")
return()
endif()

set(${c_var})
set(${h_var})

foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)

list(APPEND ${c_var} "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.cc")
list(APPEND ${h_var} "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.h")

add_custom_command(
OUTPUT "${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.cc"
"${CMAKE_BINARY_DIR}/proto/${comp}/proto/${file_name}.pb.h"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_BINARY_DIR}/proto/${comp}/proto"
COMMAND protobuf::protoc -I${file_dir} --cpp_out=${CMAKE_BINARY_DIR}/proto/${comp}/proto ${abs_file}
DEPENDS protobuf::protoc ${abs_file}
COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
endforeach()

set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
set(${c_var} ${${c_var}} PARENT_SCOPE)
set(${h_var} ${${h_var}} PARENT_SCOPE)

endfunction()

+ 0
- 59
cmake/external_libs/protobuf_shared.cmake View File

@@ -1,59 +0,0 @@
if (HAVE_PROTOBUF)
return()
endif()

include(ExternalProject)
include(GNUInstallDirs)

if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()
set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protobuf_build
URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
#DOWNLOAD_COMMAND ${CMAKE_COMMAND} -E copy_directory ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0 <SOURCE_DIR>
#CONFIGURE_COMMAND ${CMAKE_COMMAND}
#-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
#-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
#-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
#-DCMAKE_LINKER=${CMAKE_LINKER}
#-DCMAKE_AR=${CMAKE_AR}
#-DCMAKE_RANLIB=${CMAKE_RANLIB}
#-Dprotobuf_WITH_ZLIB=OFF
#-Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protobuf <SOURCE_DIR>/cmake
CONFIGURE_COMMAND cd <SOURCE_DIR>
&& ./autogen.sh && cd <BINARY_DIR> && <SOURCE_DIR>/configure --prefix=${CMAKE_INSTALL_PREFIX}/protobuf --with-zlib=no CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER} CXXFLAGS=${protobuf_CXXFLAGS} LDFLAGS=${protobuf_LDFLAGS}
&& bash -c "sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=\"\"|g' libtool && sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool"
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)
include(GNUInstallDirs)

set(PROTOBUF_SHARED_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protobuf)

add_library(protobuf SHARED IMPORTED)

file(MAKE_DIRECTORY ${PROTOBUF_SHARED_PKG_DIR}/include)

set_target_properties(protobuf PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so
)

target_include_directories(protobuf INTERFACE ${PROTOBUF_SHARED_PKG_DIR}/include)

set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(FILES ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so ${PROTOBUF_SHARED_PKG_DIR}/lib/libprotobuf.so.19.0.0 OPTIONAL
DESTINATION ${INSTALL_LIBRARY_DIR})

add_dependencies(protobuf protobuf_build)

#set(HAVE_PROTOBUF TRUE CACHE BOOL "protobuf build add")
set(HAVE_PROTOBUF TRUE)

+ 0
- 43
cmake/external_libs/protobuf_static.cmake View File

@@ -1,43 +0,0 @@
include(ExternalProject)
include(GNUInstallDirs)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)

if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
set(PROTOBUF_STATIC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protobuf_static)
ExternalProject_Add(protobuf_static_build
URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}
-DCMAKE_LINKER=${CMAKE_LINKER}
-DCMAKE_AR=${CMAKE_AR}
-DCMAKE_RANLIB=${CMAKE_RANLIB}
-Dprotobuf_WITH_ZLIB=OFF
-Dprotobuf_BUILD_TESTS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_STATIC_PKG_DIR} <SOURCE_DIR>/cmake
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)
include(GNUInstallDirs)

add_library(protobuf_static_lib STATIC IMPORTED)

set_target_properties(protobuf_static_lib PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_STATIC_PKG_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a
)

add_library(protobuf_static INTERFACE)
target_include_directories(protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include)
target_link_libraries(protobuf_static INTERFACE protobuf_static_lib)

add_dependencies(protobuf_static protobuf_static_build)

+ 0
- 103
cmake/external_libs/protoc.cmake View File

@@ -1,103 +0,0 @@
if (HAVE_PROTOC)
return()
endif()
include(ExternalProject)
include(GNUInstallDirs)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()
set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fstack-protector-all -D_FORTIFY_SOURCE=2 -D_GLIBCXX_USE_CXX11_ABI=0 -O2")
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protoc_build
URL https://github.com/protocolbuffers/protobuf/archive/v3.8.0.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)
set(PROTOC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/protoc)
set(protoc_EXECUTABLE ${PROTOC_PKG_DIR}/${CMAKE_INSTALL_BINDIR}/protoc)
function(protobuf_generate comp c_var h_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate() called without any proto files")
return()
endif()
set(${c_var})
set(${h_var})
foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)
if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${c_var} "${proto_output_path}/${file_name}.pb.cc")
list(APPEND ${h_var} "${proto_output_path}/${file_name}.pb.h")
add_custom_command(
OUTPUT "${proto_output_path}/${file_name}.pb.cc" "${proto_output_path}/${file_name}.pb.h"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --cpp_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running C++ protocol buffer compiler on ${file}" VERBATIM )
endforeach()
set_source_files_properties(${${c_var}} ${${h_var}} PROPERTIES GENERATED TRUE)
set(${c_var} ${${c_var}} PARENT_SCOPE)
set(${h_var} ${${h_var}} PARENT_SCOPE)
endfunction()
function(protobuf_generate_py comp py_var)
if(NOT ARGN)
message(SEND_ERROR "Error: protobuf_generate_py() called without any proto files")
return()
endif()
set(${py_var})
foreach(file ${ARGN})
get_filename_component(abs_file ${file} ABSOLUTE)
get_filename_component(file_name ${file} NAME_WE)
get_filename_component(file_dir ${abs_file} PATH)
get_filename_component(parent_subdir ${file_dir} NAME)
if("${parent_subdir}" STREQUAL "proto")
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto)
else()
set(proto_output_path ${CMAKE_BINARY_DIR}/proto/${comp}/proto/${parent_subdir})
endif()
list(APPEND ${py_var} "${proto_output_path}/${file_name}_pb2.py")
add_custom_command(
OUTPUT "${proto_output_path}/${file_name}_pb2.py"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory "${proto_output_path}"
COMMAND ${protoc_EXECUTABLE} -I${file_dir} --python_out=${proto_output_path} ${abs_file}
DEPENDS protoc_build ${abs_file}
COMMENT "Running PYTHON protocol buffer compiler on ${file}" VERBATIM )
endforeach()
set_source_files_properties(${${py_var}} PROPERTIES GENERATED TRUE)
set(${py_var} ${${py_var}} PARENT_SCOPE)
endfunction()
#set(HAVE_PROTOC TRUE CACHE BOOL "protoc build add")
set(HAVE_PROTOC TRUE)

+ 11
- 62
cmake/external_libs/securec.cmake View File

@@ -1,62 +1,11 @@
if (HAVE_C_SEC)
return()
endif()

include(ExternalProject)

if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
(${CMAKE_INSTALL_PREFIX} STREQUAL "C:/Program Files (x86)/ascend"))
set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output CACHE STRING "path for install()" FORCE)
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

ExternalProject_Add(c_sec_build
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_LINKER=${CMAKE_LINKER}
-DCMAKE_AR=${CMAKE_AR}
-DCMAKE_RANLIB=${CMAKE_RANLIB}
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/c_sec <SOURCE_DIR>
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE
)

set(C_SEC_PKG_DIR ${CMAKE_INSTALL_PREFIX}/c_sec)

add_library(c_sec SHARED IMPORTED)

file(MAKE_DIRECTORY ${C_SEC_PKG_DIR}/include)

set_target_properties(c_sec PROPERTIES
IMPORTED_LOCATION ${C_SEC_PKG_DIR}/lib/libc_sec.so
)

target_include_directories(c_sec INTERFACE ${C_SEC_PKG_DIR}/include)

add_dependencies(c_sec c_sec_build)

set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(FILES ${C_SEC_PKG_DIR}/lib/libc_sec.so OPTIONAL
DESTINATION ${INSTALL_LIBRARY_DIR})

add_library(c_sec_static_lib STATIC IMPORTED)
set_target_properties(c_sec_static_lib PROPERTIES
IMPORTED_LOCATION ${C_SEC_PKG_DIR}/lib/libc_sec.a
)

add_library(c_sec_static INTERFACE)
target_include_directories(c_sec_static INTERFACE ${C_SEC_PKG_DIR}/include)
target_link_libraries(c_sec_static INTERFACE c_sec_static_lib)

add_dependencies(c_sec_static c_sec_build)

#set(HAVE_C_SEC TRUE CACHE BOOL "c_sec build add")
set(HAVE_C_SEC TRUE)
graphengine_add_pkg(securec
VER 1.1.10
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
MD5 193f0ca5246c1dd84920db34d2d8249f
LIBS c_sec
PATCHES ${GE_SOURCE_DIR}/third_party/patch/securec/securec.patch001
CMAKE_OPTION "-DCMAKE_BUILD_TYPE=Release"
)
include_directories(${securec_INC})
file(COPY ${securec_INC}/../lib/libc_sec.so DESTINATION ${CMAKE_SOURCE_DIR}/build/graphengine)
add_library(graphengine::securec ALIAS securec::c_sec)

+ 371
- 0
cmake/ge_utils.cmake View File

@@ -0,0 +1,371 @@
include(FetchContent)
set(FETCHCONTENT_QUIET OFF)

function(graphengine_add_submodule_obj des_submodule_objs sub_dir submodule_name_obj)

add_subdirectory(${sub_dir})

if(NOT TARGET ${submodule_name_obj})
message(FATAL_ERROR "Can not find submodule '${submodule_name_obj}'. in ${CMAKE_CURRENT_LIST_FILE}")
endif()
if("$<TARGET_OBJECTS:${submodule_name_obj}>" IN_LIST ${des_submodule_objs})
message(FATAL_ERROR "submodule '${submodule_name_obj}' added more than once. in ${CMAKE_CURRENT_LIST_FILE}")
endif()

set(${des_submodule_objs} ${${des_submodule_objs}} $<TARGET_OBJECTS:${submodule_name_obj}> PARENT_SCOPE)

endfunction()

if (DEFINED ENV{MSLIBS_CACHE_PATH})
set(_MS_LIB_CACHE $ENV{MSLIBS_CACHE_PATH})
else()
set(_MS_LIB_CACHE ${CMAKE_BINARY_DIR}/.mslib)
endif ()
message("MS LIBS CACHE PATH: ${_MS_LIB_CACHE}")

if (NOT EXISTS ${_MS_LIB_CACHE})
file(MAKE_DIRECTORY ${_MS_LIB_CACHE})
endif ()

if (DEFINED ENV{MSLIBS_SERVER})
set(LOCAL_LIBS_SERVER $ENV{MSLIBS_SERVER})
message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}")
endif ()

include(ProcessorCount)
ProcessorCount(N)
if (JOBS)
set(THNUM ${JOBS})
else()
set(JOBS 8)
if (${JOBS} GREATER ${N})
set(THNUM ${N})
endif()
endif ()
message("set make thread num: ${THNUM}")

if(LOCAL_LIBS_SERVER)
if (NOT ENV{no_proxy})
set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}")
else()
string(FIND $ENV{no_proxy} ${LOCAL_LIBS_SERVER} IP_POS)
if (${IP_POS} EQUAL -1)
set(ENV{no_proxy} "$ENV{no_proxy},${LOCAL_LIBS_SERVER}")
endif ()
endif ()
endif()

function(__download_pkg pkg_name pkg_url pkg_md5)

if(LOCAL_LIBS_SERVER)
get_filename_component(_URL_FILE_NAME ${pkg_url} NAME)
set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${_URL_FILE_NAME}" ${pkg_url})
endif()

FetchContent_Declare(
${pkg_name}
URL ${pkg_url}
URL_HASH MD5=${pkg_md5}
)
FetchContent_GetProperties(${pkg_name})
message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}")
if(NOT ${pkg_name}_POPULATED)
FetchContent_Populate(${pkg_name})
set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE)
endif()

endfunction()

function(__download_pkg_with_git pkg_name pkg_url pkg_git_commit pkg_md5)

if(LOCAL_LIBS_SERVER)
set(pkg_url "http://${LOCAL_LIBS_SERVER}:8081/libs/${pkg_name}/${pkg_git_commit}")
FetchContent_Declare(
${pkg_name}
URL ${pkg_url}
URL_HASH MD5=${pkg_md5}
)
else()
FetchContent_Declare(
${pkg_name}
GIT_REPOSITORY ${pkg_url}
GIT_TAG ${pkg_git_commit})
endif()
FetchContent_GetProperties(${pkg_name})
message("download: ${${pkg_name}_SOURCE_DIR} , ${pkg_name} , ${pkg_url}")
if(NOT ${pkg_name}_POPULATED)
FetchContent_Populate(${pkg_name})
set(${pkg_name}_SOURCE_DIR ${${pkg_name}_SOURCE_DIR} PARENT_SCOPE)
endif()

endfunction()


function(__find_pkg_then_add_target pkg_name pkg_exe)

unset(${pkg_name}_LIBS)

message("_FIND:${${pkg_name}_BASE_DIR}")

if(pkg_exe)
find_program(${pkg_exe}_EXE ${pkg_exe} PATHS ${${pkg_name}_BASE_DIR}/bin NO_DEFAULT_PATH)
if(NOT ${pkg_exe}_EXE)
return()
endif()
add_executable(${pkg_name}::${pkg_exe} IMPORTED GLOBAL)
set_target_properties(${pkg_name}::${pkg_exe} PROPERTIES
IMPORTED_LOCATION ${${pkg_exe}_EXE}
)
message("found ${${pkg_exe}_EXE}")
endif()

foreach(_LIB_NAME ${ARGN})
set(_LIB_SEARCH_NAME ${_LIB_NAME})
set(_LIB_TYPE SHARED)
if (${pkg_name}_USE_STATIC_LIBS)
set(_LIB_SEARCH_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(_LIB_TYPE STATIC)
endif ()
set(${_LIB_NAME}_LIB ${_LIB_NAME}_LIB-NOTFOUND)
find_library(${_LIB_NAME}_LIB ${_LIB_SEARCH_NAME} PATHS ${${pkg_name}_BASE_DIR}/lib NO_DEFAULT_PATH)
if(NOT ${_LIB_NAME}_LIB)
return()
endif()
add_library(${pkg_name}::${_LIB_NAME} ${_LIB_TYPE} IMPORTED GLOBAL)
set_target_properties(${pkg_name}::${_LIB_NAME} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${${pkg_name}_BASE_DIR}/include"
IMPORTED_LOCATION ${${_LIB_NAME}_LIB}
)
list(APPEND ${pkg_name}_LIBS ${pkg_name}::${_LIB_NAME})
message("found ${${_LIB_NAME}_LIB}")
STRING( REGEX REPLACE "(.+)/(.+)" "\\1" LIBPATH ${${_LIB_NAME}_LIB})
set(${pkg_name}_LIBPATH ${LIBPATH} CACHE STRING INTERNAL)
endforeach(_LIB_NAME)

set(${pkg_name}_LIBS ${${pkg_name}_LIBS} PARENT_SCOPE)
endfunction()

function(__exec_cmd)
set(options )
set(oneValueArgs WORKING_DIRECTORY)
set(multiValueArgs COMMAND)

cmake_parse_arguments(EXEC "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

execute_process(COMMAND ${EXEC_COMMAND}
WORKING_DIRECTORY ${EXEC_WORKING_DIRECTORY}
RESULT_VARIABLE RESULT)
if(NOT RESULT EQUAL "0")
message(FATAL_ERROR "error! when ${EXEC_COMMAND} in ${EXEC_WORKING_DIRECTORY}")
endif()
endfunction()

function(__check_patches pkg_patches)
# check patches
if (PKG_PATCHES)
file(TOUCH ${_MS_LIB_CACHE}/${pkg_name}_patch.md5)
file(READ ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${pkg_name}_PATCHES_MD5)

message("patches md5:${${pkg_name}_PATCHES_MD5}")

set(${pkg_name}_PATCHES_NEW_MD5 )
foreach(_PATCH ${PKG_PATCHES})
file(MD5 ${_PATCH} _PF_MD5)
set(${pkg_name}_PATCHES_NEW_MD5 "${${pkg_name}_PATCHES_NEW_MD5},${_PF_MD5}")
endforeach(_PATCH)

if (NOT ${pkg_name}_PATCHES_MD5 STREQUAL ${pkg_name}_PATCHES_NEW_MD5)
set(${pkg_name}_PATCHES ${PKG_PATCHES})
file(REMOVE_RECURSE "${_MS_LIB_CACHE}/${pkg_name}-subbuild")
file(WRITE ${_MS_LIB_CACHE}/${pkg_name}_patch.md5 ${${pkg_name}_PATCHES_NEW_MD5})
message("patches changed : ${${pkg_name}_PATCHES_NEW_MD5}")
endif ()
endif ()
endfunction()

set(GE_FIND_NO_DEFAULT_PATH NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_SYSTEM_ENVIRONMENT_PATH
NO_CMAKE_BUILDS_PATH NO_CMAKE_PACKAGE_REGISTRY NO_CMAKE_SYSTEM_PATH
NO_CMAKE_SYSTEM_PACKAGE_REGISTRY)
set(GE_FIND_NO_DEFAULT_PATH ${GE_FIND_NO_DEFAULT_PATH} PARENT_SCOPE)

function(graphengine_add_pkg pkg_name )
set(options )
set(oneValueArgs URL MD5 GIT_REPOSITORY GIT_TAG VER EXE DIR HEAD_ONLY CMAKE_PATH)
set(multiValueArgs CMAKE_OPTION LIBS PRE_CONFIGURE_COMMAND CONFIGURE_COMMAND BUILD_OPTION INSTALL_INCS INSTALL_LIBS PATCHES)
cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} )

if (NOT PKG_CMAKE_PATH)
set(PKG_CMAKE_PATH ..)
endif ()

set(__FIND_PKG_NAME ${pkg_name})
string(TOLOWER ${pkg_name} pkg_name)
message("pkg name:${__FIND_PKG_NAME},${pkg_name}")

set(${pkg_name}_PATCHES_HASH )
foreach(_PATCH ${PKG_PATCHES})
file(MD5 ${_PATCH} _PF_MD5)
set(${pkg_name}_PATCHES_HASH "${${pkg_name}_PATCHES_HASH},${_PF_MD5}")
endforeach(_PATCH)

# check options
set(${pkg_name}_CONFIG_TXT
"${CMAKE_CXX_COMPILER_VERSION}-${CMAKE_C_COMPILER_VERSION}
${ARGN} - ${${pkg_name}_USE_STATIC_LIBS}- ${${pkg_name}_PATCHES_HASH}
${${pkg_name}_CXXFLAGS}--${${pkg_name}_CFLAGS}--${${pkg_name}_LDFLAGS}")
string(REPLACE ";" "-" ${pkg_name}_CONFIG_TXT ${${pkg_name}_CONFIG_TXT})
string(MD5 ${pkg_name}_CONFIG_HASH ${${pkg_name}_CONFIG_TXT})

message("${pkg_name} config hash: ${${pkg_name}_CONFIG_HASH}")

set(${pkg_name}_BASE_DIR ${_MS_LIB_CACHE}/${pkg_name}_${${pkg_name}_CONFIG_HASH})
set(${pkg_name}_DIRPATH ${${pkg_name}_BASE_DIR} CACHE STRING INTERNAL)

if(EXISTS ${${pkg_name}_BASE_DIR}/options.txt AND PKG_HEAD_ONLY)
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE)
add_library(${pkg_name} INTERFACE)
target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})
return()
endif ()

if(NOT PKG_EXE)
set(PKG_EXE 0)
endif()

set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR})
set(${__FIND_PKG_NAME}_ROOT ${${pkg_name}_BASE_DIR} PARENT_SCOPE)

if (PKG_LIBS)
__find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS})
if(${pkg_name}_LIBS)
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
message("Found libs: ${${pkg_name}_LIBS}")
return()
endif()
elseif(NOT PKG_HEAD_ONLY)
find_package(${__FIND_PKG_NAME} ${PKG_VER} ${GE_FIND_NO_DEFAULT_PATH})
if (${__FIND_PKG_NAME}_FOUND)
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
message("Found pkg: ${__FIND_PKG_NAME}")
return()
endif ()
endif ()

if (NOT PKG_DIR)
if (PKG_GIT_REPOSITORY)
__download_pkg_with_git(${pkg_name} ${PKG_GIT_REPOSITORY} ${PKG_GIT_TAG} ${PKG_MD5})
else()
__download_pkg(${pkg_name} ${PKG_URL} ${PKG_MD5})
endif()
else()
set(${pkg_name}_SOURCE_DIR ${PKG_DIR})
endif ()
file(WRITE ${${pkg_name}_BASE_DIR}/options.txt ${${pkg_name}_CONFIG_TXT})
message("${pkg_name}_SOURCE_DIR : ${${pkg_name}_SOURCE_DIR}")

foreach(_PATCH_FILE ${PKG_PATCHES})
message("patching ${${pkg_name}_SOURCE_DIR} -p1 < ${_PATCH_FILE}")
execute_process(COMMAND patch -p1 INPUT_FILE ${_PATCH_FILE}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}
RESULT_VARIABLE Result)
if(NOT Result EQUAL "0")
message(FATAL_ERROR "Failed patch: ${_PATCH_FILE}")
endif()
endforeach(_PATCH_FILE)

file(LOCK ${${pkg_name}_BASE_DIR} DIRECTORY GUARD FUNCTION RESULT_VARIABLE ${pkg_name}_LOCK_RET TIMEOUT 600)
if(NOT ${pkg_name}_LOCK_RET EQUAL "0")
message(FATAL_ERROR "error! when try lock ${${pkg_name}_BASE_DIR} : ${${pkg_name}_LOCK_RET}")
endif()

if(${pkg_name}_SOURCE_DIR)
if (PKG_HEAD_ONLY)
file(GLOB ${pkg_name}_SOURCE_SUBDIRS ${${pkg_name}_SOURCE_DIR}/*)
file(COPY ${${pkg_name}_SOURCE_SUBDIRS} DESTINATION ${${pkg_name}_BASE_DIR})
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/${PKG_HEAD_ONLY} PARENT_SCOPE)
add_library(${pkg_name} INTERFACE)
target_include_directories(${pkg_name} INTERFACE ${${pkg_name}_INC})

elseif (PKG_CMAKE_OPTION)
# in cmake
file(MAKE_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)
if (${pkg_name}_CFLAGS)
set(${pkg_name}_CMAKE_CFLAGS "-DCMAKE_C_FLAGS=${${pkg_name}_CFLAGS}")
endif ()
if (${pkg_name}_CXXFLAGS)
set(${pkg_name}_CMAKE_CXXFLAGS "-DCMAKE_CXX_FLAGS=${${pkg_name}_CXXFLAGS}")
endif ()

if (${pkg_name}_LDFLAGS)
if (${pkg_name}_USE_STATIC_LIBS)
#set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_STATIC_LINKER_FLAGS=${${pkg_name}_LDFLAGS}")
else()
set(${pkg_name}_CMAKE_LDFLAGS "-DCMAKE_SHARED_LINKER_FLAGS=${${pkg_name}_LDFLAGS}")
endif ()
endif ()

__exec_cmd(COMMAND ${CMAKE_COMMAND} ${PKG_CMAKE_OPTION} -G ${CMAKE_GENERATOR}
${${pkg_name}_CMAKE_CFLAGS} ${${pkg_name}_CMAKE_CXXFLAGS} ${${pkg_name}_CMAKE_LDFLAGS}
-DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ${PKG_CMAKE_PATH}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)

__exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)

else()
if (${pkg_name}_CFLAGS)
set(${pkg_name}_MAKE_CFLAGS "CFLAGS=${${pkg_name}_CFLAGS}")
endif ()
if (${pkg_name}_CXXFLAGS)
set(${pkg_name}_MAKE_CXXFLAGS "CXXFLAGS=${${pkg_name}_CXXFLAGS}")
endif ()
if (${pkg_name}_LDFLAGS)
set(${pkg_name}_MAKE_LDFLAGS "LDFLAGS=${${pkg_name}_LDFLAGS}")
endif ()
# in configure && make
if (PKG_PRE_CONFIGURE_COMMAND)
__exec_cmd(COMMAND ${PKG_PRE_CONFIGURE_COMMAND}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
endif ()

if (PKG_CONFIGURE_COMMAND)
__exec_cmd(COMMAND ${PKG_CONFIGURE_COMMAND}
${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS}
--prefix=${${pkg_name}_BASE_DIR}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
endif ()
set(${pkg_name}_BUILD_OPTION ${PKG_BUILD_OPTION})
if (NOT PKG_CONFIGURE_COMMAND)
set(${pkg_name}_BUILD_OPTION ${${pkg_name}_BUILD_OPTION}
${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS})
endif ()
# build
__exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j${THNUM}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})

if (PKG_INSTALL_INCS OR PKG_INSTALL_LIBS)
file(GLOB ${pkg_name}_INSTALL_INCS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_INCS})
file(GLOB ${pkg_name}_INSTALL_LIBS ${${pkg_name}_SOURCE_DIR}/${PKG_INSTALL_LIBS})
file(COPY ${${pkg_name}_INSTALL_INCS} DESTINATION ${${pkg_name}_BASE_DIR}/include)
file(COPY ${${pkg_name}_INSTALL_LIBS} DESTINATION ${${pkg_name}_BASE_DIR}/lib)
else()
__exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} install WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
endif ()
endif ()
endif()

if (PKG_LIBS)
__find_pkg_then_add_target(${pkg_name} ${PKG_EXE} ${PKG_LIBS})
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
if(NOT ${pkg_name}_LIBS)
message(FATAL_ERROR "Can not find pkg: ${pkg_name}")
endif()
else()
find_package(${__FIND_PKG_NAME} ${PKG_VER} QUIET)
if (${__FIND_PKG_NAME}_FOUND)
set(${pkg_name}_INC ${${pkg_name}_BASE_DIR}/include PARENT_SCOPE)
message("Found pkg: ${${__FIND_PKG_NAME}_LIBRARIES}")
return()
endif ()
endif ()
endfunction()

+ 0
- 52
cmake/intf_pub_android.cmake View File

@@ -1,52 +0,0 @@

add_library(intf_pub INTERFACE)

target_compile_options(intf_pub INTERFACE
-Wall
-fPIC
-fstack-protector-strong
)
target_compile_definitions(intf_pub INTERFACE
$<$<STREQUAL:${PRODUCT_SIDE},host>:_GLIBCXX_USE_CXX11_ABI=0>
$<$<CONFIG:Release>:CFG_BUILD_NDEBUG>
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
WIN64=1
LINUX=0
)
target_link_options(intf_pub INTERFACE
-Wl,-z,relro
-Wl,-z,now
-Wl,-z,noexecstack
$<$<CONFIG:Release>:-Wl,--build-id=none>
)
target_link_directories(intf_pub INTERFACE
)

add_library(intf_ccec INTERFACE)
target_compile_options(intf_ccec INTERFACE
-mcpu=cortex-a73
--target=aarch64-linux-android29
--sysroot=${HCC_PATH}/../sysroot
-L${HCC_PATH}/../lib/gcc/aarch64-linux-android/4.9.x
-Wall
-fPIC
-fstack-protector-strong
)
target_compile_definitions(intf_ccec INTERFACE
$<$<STREQUAL:${PRODUCT_SIDE},host>:_GLIBCXX_USE_CXX11_ABI=0>
$<$<CONFIG:Release>:CFG_BUILD_NDEBUG>
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
)

target_link_options(intf_ccec INTERFACE
-mcpu=cortex-a73
--target=aarch64-linux-android29
--sysroot=${HCC_PATH}/../sysroot
-L${HCC_PATH}/../lib/gcc/aarch64-linux-android/4.9.x
-Wl,-cce-host-android
-Wl,-z,relro
-Wl,-z,now
-Wl,-z,noexecstack
$<$<CONFIG:Release>:-Wl,--build-id=none>
)


+ 0
- 33
cmake/intf_pub_linux.cmake View File

@@ -1,33 +0,0 @@
if (HAVE_PUB)
return()
endif()

add_library(intf_pub INTERFACE)

target_compile_options(intf_pub INTERFACE
-Wall
-fPIC
$<IF:$<STREQUAL:${CMAKE_SYSTEM_NAME},centos>,-fstack-protector-all,-fstack-protector-strong>
$<$<COMPILE_LANGUAGE:CXX>:-std=c++11>
)
target_compile_definitions(intf_pub INTERFACE
_GLIBCXX_USE_CXX11_ABI=0
$<$<CONFIG:Release>:CFG_BUILD_NDEBUG>
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
WIN64=1
LINUX=0
)
target_link_options(intf_pub INTERFACE
-Wl,-z,relro
-Wl,-z,now
-Wl,-z,noexecstack
$<$<CONFIG:Release>:-Wl,--build-id=none>
)
target_link_directories(intf_pub INTERFACE
)
target_link_libraries(intf_pub INTERFACE
-lpthread
)

#set(HAVE_PUB TRUE CACHE BOOL "pub add")
set(HAVE_PUB TRUE)

+ 0
- 24
cmake/intf_pub_windows.cmake View File

@@ -1,24 +0,0 @@

add_library(intf_pub INTERFACE)

target_compile_options(intf_pub INTERFACE
-Wall
-fPIC
$<IF:$<STREQUAL:${OS_TYPE},centos>,-fstack-protector-all,-fstack-protector-strong>
$<$<COMPILE_LANGUAGE:CXX>:-std=c++11>
)
target_compile_definitions(intf_pub INTERFACE
$<$<STREQUAL:${PRODUCT_SIDE},host>:_GLIBCXX_USE_CXX11_ABI=0>
OS_TYPE=WIN64
WIN64=1
LINUX=0
$<$<CONFIG:Release>:CFG_BUILD_NDEBUG>
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
)
target_link_options(intf_pub INTERFACE
$<$<CONFIG:Release>:-Wl,--build-id=none>
)
target_link_directories(intf_pub INTERFACE
)
target_link_libraries(intf_pub INTERFACE
)

+ 0
- 910
ge/CMakeLists.txt View File

@@ -1,910 +0,0 @@
add_subdirectory(common)
add_subdirectory(plugin/engine)
add_subdirectory(graph/build/memory)
add_subdirectory(ge_local_engine)
add_subdirectory(host_cpu_engine)
add_subdirectory(executor)
add_subdirectory(offline)

set(PROTO_LIST
"${METADEF_DIR}/proto/fusion_model.proto"
"${GE_CODE_DIR}/ge/proto/optimizer_priority.proto"
)

set(PROTO_CLIENT_LIST
"${METADEF_DIR}/proto/ge_api.proto"
)

set(PROTO_HEADER_LIST
"${METADEF_DIR}/proto/om.proto"
"${METADEF_DIR}/proto/task.proto"
"${METADEF_DIR}/proto/insert_op.proto"
"${METADEF_DIR}/proto/ge_ir.proto"
"${METADEF_DIR}/proto/fwk_adapter.proto"
"${METADEF_DIR}/proto/op_mapping_info.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST})
protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})

############ libge_runner.so ############
set(TRAIN_SRC_LIST
"common/formats/format_transfers/datatype_transfer.cc"
"common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc"
"common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc"
"common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc"
"common/formats/format_transfers/format_transfer_fractal_nz.cc"
"common/formats/format_transfers/format_transfer_fractal_z.cc"
"common/formats/format_transfers/format_transfer_fractal_zz.cc"
"common/formats/format_transfers/format_transfer_fracz_hwcn.cc"
"common/formats/format_transfers/format_transfer_fracz_nchw.cc"
"common/formats/format_transfers/format_transfer_fracz_nhwc.cc"
"common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc"
"common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc"
"common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc"
"common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc"
"common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc"
"common/formats/format_transfers/format_transfer_transpose.cc"
"common/formats/formats.cc"
"common/formats/utils/formats_trans_utils.cc"
"common/fp16_t.cc"
"common/ge/plugin_manager.cc"
"common/ge/op_tiling_manager.cc"
"common/helper/model_cache_helper.cc"
"common/profiling/profiling_manager.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_properties.cc"
"common/dump/dump_op.cc"
"engine_manager/dnnengine_manager.cc"
"ge_local_engine/engine/host_cpu_engine.cc"
"generator/ge_generator.cc"
"generator/generator_api.cc"
"graph/build/graph_builder.cc"
"graph/build/label_allocator.cc"
"graph/build/logical_stream_allocator.cc"
"graph/build/model_builder.cc"
"graph/build/run_context.cc"
"graph/build/stream_allocator.cc"
"graph/build/stream_graph_optimizer.cc"
"graph/build/task_generator.cc"
"graph/common/bcast.cc"
"graph/common/local_context.cc"
"graph/common/omg_util.cc"
"graph/common/transop_util.cc"
"graph/execute/graph_execute.cc"
"graph/label/case_label_maker.cc"
"graph/label/if_label_maker.cc"
"graph/label/label_maker.cc"
"graph/label/partitioned_call_label_maker.cc"
"graph/label/while_label_maker.cc"
"graph/load/graph_loader.cc"
"graph/load/new_model_manager/cpu_queue_schedule.cc"
"graph/load/new_model_manager/data_dumper.cc"
"graph/load/new_model_manager/data_inputer.cc"
"graph/load/new_model_manager/davinci_model.cc"
"graph/load/new_model_manager/davinci_model_parser.cc"
"graph/load/new_model_manager/model_manager.cc"
"graph/load/new_model_manager/model_utils.cc"
"graph/load/new_model_manager/aipp_utils.cc"
"graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"graph/load/new_model_manager/task_info/event_record_task_info.cc"
"graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/new_model_manager/task_info/hccl_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_task_info.cc"
"graph/load/new_model_manager/task_info/label_set_task_info.cc"
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/new_model_manager/tbe_handle_store.cc"
"graph/load/new_model_manager/zero_copy_task.cc"
"graph/load/new_model_manager/zero_copy_offset.cc"
"graph/manager/graph_context.cc"
"graph/manager/graph_manager.cc"
"graph/manager/graph_manager_utils.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
"graph/manager/memory_api.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/trans_var_data_utils.cc"
"graph/manager/util/debug.cc"
"graph/manager/util/hcom_util.cc"
"graph/manager/util/rt_context_util.cc"
"graph/manager/util/variable_accelerate_ctrl.cc"
"graph/optimize/graph_optimize.cc"
"graph/optimize/mem_rw_conflict_optimize.cc"
"graph/optimize/summary_optimize.cc"
"graph/partition/engine_place.cc"
"graph/partition/graph_partition.cc"
"graph/passes/addn_pass.cc"
"graph/passes/aicpu_constant_folding_pass.cc"
"graph/passes/assert_pass.cc"
"graph/passes/input_output_connection_identify_pass.cc"
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/partition/dynamic_shape_partition.cc"
"graph/partition/stage_partition.cc"
"graph/passes/base_pass.cc"
"graph/passes/bitcast_pass.cc"
"graph/passes/cast_remove_pass.cc"
"graph/passes/cast_translate_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/transop_symmetry_elimination_pass.cc"
"graph/passes/compile_nodes_pass.cc"
"graph/passes/constant_folding_pass.cc"
"graph/passes/constant_fuse_same_pass.cc"
"graph/passes/control_trigger_pass.cc"
"graph/passes/dimension_adjust_pass.cc"
"graph/passes/dimension_compute_pass.cc"
"graph/passes/dropout_pass.cc"
"graph/passes/hccl_group_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc"
"graph/passes/flow_ctrl_pass.cc"
"graph/passes/global_step_insert_pass.cc"
"host_kernels/transpose_kernel.cc"
"host_kernels/add_kernel.cc"
"host_kernels/broadcast_args_kernel.cc"
"host_kernels/broadcast_gradient_args_kernel.cc"
"host_kernels/cast_kernel.cc"
"host_kernels/concat_offset_kernel.cc"
"host_kernels/concat_v2_kernel.cc"
"host_kernels/dynamic_stitch_kernel.cc"
"host_kernels/identity_kernel.cc"
"host_kernels/empty_kernel.cc"
"host_kernels/expanddims_kernel.cc"
"host_kernels/fill_kernel.cc"
"host_kernels/floordiv_kernel.cc"
"host_kernels/floormod_kernel.cc"
"host_kernels/gather_v2_kernel.cc"
"host_kernels/greater_kernel.cc"
"host_kernels/kernel_utils.cc"
"host_kernels/maximum_kernel.cc"
"host_kernels/mul_kernel.cc"
"host_kernels/pack_kernel.cc"
"host_kernels/permute_kernel.cc"
"host_kernels/range_kernel.cc"
"host_kernels/rank_kernel.cc"
"host_kernels/reduce_prod_kernel.cc"
"host_kernels/reshape_kernel.cc"
"host_kernels/rsqrt_kernel.cc"
"host_kernels/shape_kernel.cc"
"host_kernels/shape_n_kernel.cc"
"host_kernels/size_kernel.cc"
"host_kernels/slice_d_kernel.cc"
"host_kernels/slice_kernel.cc"
"host_kernels/squeeze_kernel.cc"
"host_kernels/unsqueeze_kernel.cc"
"host_kernels/ssd_prior_box_kernel.cc"
"host_kernels/strided_slice_kernel.cc"
"host_kernels/sub_kernel.cc"
"host_kernels/transdata_kernel.cc"
"host_kernels/unpack_kernel.cc"
"graph/passes/folding_pass.cc"
"graph/passes/get_original_format_pass.cc"
"graph/passes/guarantee_const_pass.cc"
"graph/passes/hccl_memcpy_pass.cc"
"graph/passes/identity_pass.cc"
"graph/passes/ref_identity_delete_op_pass.cc"
"graph/passes/infershape_pass.cc"
"graph/passes/isolated_op_remove_pass.cc"
"graph/passes/iterator_op_pass.cc"
"graph/passes/link_gen_mask_nodes_pass.cc"
"graph/passes/merge_pass.cc"
"graph/passes/multi_batch_pass.cc"
"graph/passes/multi_batch_clone_pass.cc"
"graph/passes/subexpression_migration_pass.cc"
"graph/passes/subgraph_const_migration_pass.cc"
"graph/passes/unused_args_clean_pass.cc"
"graph/passes/net_output_pass.cc"
"graph/passes/next_iteration_pass.cc"
"graph/passes/no_use_reshape_remove_pass.cc"
"graph/passes/pass_manager.cc"
"graph/passes/pass_utils.cc"
"graph/passes/permute_pass.cc"
"graph/passes/placeholder_with_default_pass.cc"
"graph/passes/prevent_gradient_pass.cc"
"graph/passes/print_op_pass.cc"
"graph/passes/prune_pass.cc"
"graph/passes/ctrl_edge_transfer_pass.cc"
"graph/passes/replace_with_empty_const_pass.cc"
"graph/passes/reshape_remove_pass.cc"
"graph/passes/reshape_recovery_pass.cc"
"graph/passes/resource_pair_add_control_pass.cc"
"graph/passes/resource_pair_remove_control_pass.cc"
"graph/passes/same_transdata_breadth_fusion_pass.cc"
"graph/passes/save_pass.cc"
"graph/passes/shape_operate_op_remove_pass.cc"
"graph/passes/snapshot_pass.cc"
"graph/passes/stop_gradient_pass.cc"
"graph/passes/subgraph_pass.cc"
"graph/passes/data_pass.cc"
"graph/passes/switch_data_edges_bypass.cc"
"graph/passes/switch_logic_remove_pass.cc"
"graph/passes/merge_to_stream_merge_pass.cc"
"graph/passes/switch_to_stream_switch_pass.cc"
"graph/passes/attach_stream_label_pass.cc"
"graph/passes/switch_dead_branch_elimination.cc"
"graph/passes/replace_transshape_pass.cc"
"graph/passes/transop_breadth_fusion_pass.cc"
"graph/passes/transop_depth_fusion_pass.cc"
"graph/passes/transop_nearby_allreduce_fusion_pass.cc"
"graph/passes/transop_without_reshape_fusion_pass.cc"
"graph/passes/transpose_transdata_pass.cc"
"graph/passes/unused_const_pass.cc"
"graph/passes/unused_op_remove_pass.cc"
"graph/passes/var_is_initialized_op_pass.cc"
"graph/passes/parallel_concat_start_op_pass.cc"
"graph/passes/cond_pass.cc"
"graph/passes/cond_remove_pass.cc"
"graph/passes/for_pass.cc"
"graph/passes/variable_format_pass.cc"
"graph/passes/variable_op_pass.cc"
"graph/passes/variable_prepare_op_pass.cc"
"graph/passes/variable_ref_delete_op_pass.cc"
"graph/passes/variable_ref_useless_control_out_delete_pass.cc"
"graph/passes/end_of_sequence_add_control_pass.cc"
"graph/passes/memcpy_addr_async_pass.cc"
"graph/passes/set_input_output_offset_pass.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"graph/preprocess/multi_batch_options.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"init/gelib.cc"
"model/ge_model.cc"
"model/ge_root_model.cc"
"omm/csa_interact.cc"
"opskernel_manager/ops_kernel_manager.cc"
"opskernel_manager/ops_kernel_builder_manager.cc"
"session/inner_session.cc"
"session/session_manager.cc"
"single_op/single_op.cc"
"single_op/single_op_manager.cc"
"single_op/single_op_model.cc"
"single_op/stream_resource.cc"
"single_op/task/build_task_utils.cc"
"single_op/task/op_task.cc"
"single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc"
"hybrid/common/tensor_value.cc"
"hybrid/common/npu_memory_allocator.cc"
"hybrid/executor/rt_callback_manager.cc"
"hybrid/executor/node_state.cc"
"hybrid/executor/node_done_manager.cc"
"hybrid/executor/hybrid_profiler.cc"
"hybrid/executor/hybrid_model_executor.cc"
"hybrid/executor/hybrid_model_async_executor.cc"
"hybrid/executor/hybrid_execution_context.cc"
"hybrid/executor/subgraph_context.cc"
"hybrid/executor/subgraph_executor.cc"
"hybrid/executor/worker/task_compile_engine.cc"
"hybrid/executor/worker/shape_inference_engine.cc"
"hybrid/executor/worker/execution_engine.cc"
"hybrid/model/hybrid_model.cc"
"hybrid/model/hybrid_model_builder.cc"
"hybrid/model/node_item.cc"
"hybrid/model/graph_item.cc"
"hybrid/node_executor/aicore/aicore_node_executor.cc"
"hybrid/node_executor/aicore/aicore_op_task.cc"
"hybrid/node_executor/aicore/aicore_task_builder.cc"
"hybrid/node_executor/aicore/aicore_task_compiler.cc"
"hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"hybrid/node_executor/aicpu/aicpu_node_executor.cc"
"hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
"hybrid/node_executor/ge_local/ge_local_node_executor.cc"
"hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
"hybrid/node_executor/host_cpu/kernel_factory.cc"
"hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
"hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
"hybrid/node_executor/controlop/control_op_executor.cc"
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
"hybrid/node_executor/hccl/hccl_node_executor.cc"
"hybrid/node_executor/rts/rts_node_executor.cc"
"hybrid/node_executor/node_executor.cc"
"hybrid/node_executor/task_context.cc"
"hybrid/hybrid_davinci_model.cc"
"executor/ge_executor.cc"
"client/ge_api.cc"
"client/ge_prof.cc"
"analyzer/analyzer.cc"
)

add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})

target_compile_definitions(ge_runner PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
DAVINCI_SUPPORT_PROFILING
REUSE_MEMORY=1
FMK_SUPPORT_DUMP
DAVINCI_CLOUD
)

target_compile_options(ge_runner PRIVATE
-O2
)

target_include_directories(ge_runner PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/analyzer
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${GE_CODE_DIR}/inc/framework/common
${METADEF_DIR}
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/external
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
#### blue zone
${ASCEND_DIR}/driver/include
${ASCEND_DIR}/fwkacllib/include
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_runner
$<BUILD_INTERFACE:intf_pub>
ge_memory
adump_server
msprofiler
-Wl,--no-as-needed
graph
ge_common
protobuf
register
c_sec
slog
mmpa
msprof
runtime
resource
error_manager
ascend_hal_stub
-Wl,--as-needed
json
-lrt
-ldl
)

############ libge_compiler.so ############
set(INFER_SRC_LIST
"graph/manager/trans_var_data_utils.cc"
"omm/csa_interact.cc"
"common/fp16_t.cc"
"common/formats/utils/formats_trans_utils.cc"
"common/formats/format_transfers/datatype_transfer.cc"
"common/formats/format_transfers/format_transfer_transpose.cc"
"common/formats/format_transfers/format_transfer_nchw_nc1hwc0.cc"
"common/formats/format_transfers/format_transfer_fractal_z.cc"
"common/formats/format_transfers/format_transfer_fractal_nz.cc"
"common/formats/format_transfers/format_transfer_fractal_zz.cc"
"common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc"
"common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc"
"common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc"
"common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc"
"common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc"
"common/formats/format_transfers/format_transfer_fracz_nchw.cc"
"common/formats/format_transfers/format_transfer_fracz_nhwc.cc"
"common/formats/format_transfers/format_transfer_fracz_hwcn.cc"
"common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc"
"common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc"
"common/formats/format_transfers/format_transfer_nchw_fz_c04.cc"
"common/formats/formats.cc"
"common/profiling/profiling_manager.cc"
"common/dump/dump_properties.cc"
"common/dump/dump_manager.cc"
"common/dump/dump_op.cc"
"common/dump/dump_server.cc"
"common/helper/model_cache_helper.cc"
"ge_local_engine/engine/host_cpu_engine.cc"
"common/ge/plugin_manager.cc"
"common/ge/op_tiling_manager.cc"
"init/gelib.cc"
"session/inner_session.cc"
"session/session_manager.cc"
"engine_manager/dnnengine_manager.cc"
"opskernel_manager/ops_kernel_manager.cc"
"opskernel_manager/ops_kernel_builder_manager.cc"
"graph/manager/graph_manager.cc"
"graph/manager/graph_manager_utils.cc"
"graph/manager/graph_context.cc"
"graph/preprocess/graph_preprocess.cc"
"graph/preprocess/multi_batch_options.cc"
"graph/preprocess/multi_batch_copy_graph.cc"
"graph/execute/graph_execute.cc"
"graph/load/graph_loader.cc"
"graph/optimize/graph_optimize.cc"
"graph/optimize/mem_rw_conflict_optimize.cc"
"graph/optimize/summary_optimize.cc"
"graph/build/graph_builder.cc"
"graph/partition/engine_place.cc"
"graph/partition/graph_partition.cc"
"graph/partition/dynamic_shape_partition.cc"
"graph/partition/stage_partition.cc"
"generator/ge_generator.cc"
"generator/generator_api.cc"
"graph/manager/graph_var_manager.cc"
"graph/manager/host_mem_manager.cc"
"graph/manager/rdma_pool_allocator.cc"
"graph/manager/graph_mem_allocator.cc"
"graph/manager/graph_caching_allocator.cc"
"model/ge_model.cc"
"model/ge_root_model.cc"
"graph/common/transop_util.cc"
"graph/passes/pass_manager.cc"
"graph/passes/resource_pair_add_control_pass.cc"
"graph/passes/resource_pair_remove_control_pass.cc"
"graph/passes/pass_utils.cc"
"graph/passes/base_pass.cc"
"graph/passes/bitcast_pass.cc"
"graph/passes/constant_folding_pass.cc"
"graph/passes/aicpu_constant_folding_pass.cc"
"graph/passes/reshape_remove_pass.cc"
"graph/passes/reshape_recovery_pass.cc"
"graph/passes/transop_breadth_fusion_pass.cc"
"graph/passes/transop_depth_fusion_pass.cc"
"graph/passes/transop_nearby_allreduce_fusion_pass.cc"
"graph/passes/same_transdata_breadth_fusion_pass.cc"
"graph/passes/transop_without_reshape_fusion_pass.cc"
"graph/passes/compile_nodes_pass.cc"
"graph/passes/variable_prepare_op_pass.cc"
"graph/passes/variable_ref_delete_op_pass.cc"
"graph/passes/variable_ref_useless_control_out_delete_pass.cc"
"graph/passes/subgraph_pass.cc"
"graph/passes/data_pass.cc"
"graph/passes/net_output_pass.cc"
"graph/passes/replace_transshape_pass.cc"
"graph/passes/constant_fuse_same_pass.cc"
"graph/passes/print_op_pass.cc"
"graph/passes/no_use_reshape_remove_pass.cc"
"graph/passes/iterator_op_pass.cc"
"graph/passes/input_output_connection_identify_pass.cc"
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/common/omg_util.cc"
"graph/common/bcast.cc"
"graph/common/local_context.cc"
"graph/passes/dimension_compute_pass.cc"
"graph/passes/dimension_adjust_pass.cc"
"graph/passes/get_original_format_pass.cc"
"graph/passes/shape_operate_op_remove_pass.cc"
"graph/passes/unused_op_remove_pass.cc"
"graph/passes/assert_pass.cc"
"graph/passes/dropout_pass.cc"
"graph/passes/infershape_pass.cc"
"graph/passes/unused_const_pass.cc"
"graph/passes/isolated_op_remove_pass.cc"
"graph/passes/permute_pass.cc"
"graph/passes/ctrl_edge_transfer_pass.cc"
"graph/passes/end_of_sequence_add_control_pass.cc"
"host_kernels/broadcast_gradient_args_kernel.cc"
"host_kernels/greater_kernel.cc"
"host_kernels/gather_v2_kernel.cc"
"host_kernels/maximum_kernel.cc"
"host_kernels/floormod_kernel.cc"
"host_kernels/floordiv_kernel.cc"
"host_kernels/range_kernel.cc"
"host_kernels/shape_kernel.cc"
"host_kernels/size_kernel.cc"
"host_kernels/shape_n_kernel.cc"
"host_kernels/rank_kernel.cc"
"host_kernels/broadcast_args_kernel.cc"
"host_kernels/fill_kernel.cc"
"host_kernels/empty_kernel.cc"
"host_kernels/expanddims_kernel.cc"
"host_kernels/reshape_kernel.cc"
"host_kernels/squeeze_kernel.cc"
"host_kernels/unsqueeze_kernel.cc"
"host_kernels/kernel_utils.cc"
"host_kernels/cast_kernel.cc"
"host_kernels/transdata_kernel.cc"
"host_kernels/unpack_kernel.cc"
"host_kernels/transpose_kernel.cc"
"host_kernels/permute_kernel.cc"
"host_kernels/pack_kernel.cc"
"host_kernels/concat_v2_kernel.cc"
"host_kernels/concat_offset_kernel.cc"
"host_kernels/strided_slice_kernel.cc"
"host_kernels/ssd_prior_box_kernel.cc"
"host_kernels/add_kernel.cc"
"host_kernels/sub_kernel.cc"
"host_kernels/mul_kernel.cc"
"host_kernels/reduce_prod_kernel.cc"
"host_kernels/rsqrt_kernel.cc"
"host_kernels/slice_kernel.cc"
"host_kernels/slice_d_kernel.cc"
"host_kernels/dynamic_stitch_kernel.cc"
"host_kernels/identity_kernel.cc"
"graph/passes/stop_gradient_pass.cc"
"graph/passes/prevent_gradient_pass.cc"
"graph/passes/identity_pass.cc"
"graph/passes/ref_identity_delete_op_pass.cc"
"graph/passes/placeholder_with_default_pass.cc"
"graph/passes/snapshot_pass.cc"
"graph/passes/guarantee_const_pass.cc"
"graph/passes/var_is_initialized_op_pass.cc"
"graph/passes/parallel_concat_start_op_pass.cc"
"graph/passes/folding_pass.cc"
"graph/passes/cast_translate_pass.cc"
"graph/passes/prune_pass.cc"
"graph/passes/merge_to_stream_merge_pass.cc"
"graph/passes/switch_to_stream_switch_pass.cc"
"graph/passes/attach_stream_label_pass.cc"
"graph/passes/multi_batch_pass.cc"
"graph/passes/multi_batch_clone_pass.cc"
"graph/passes/subexpression_migration_pass.cc"
"graph/passes/subgraph_const_migration_pass.cc"
"graph/passes/unused_args_clean_pass.cc"
"graph/passes/next_iteration_pass.cc"
"graph/passes/control_trigger_pass.cc"
"graph/passes/cond_pass.cc"
"graph/passes/cond_remove_pass.cc"
"graph/passes/for_pass.cc"
"graph/passes/enter_pass.cc"
"graph/passes/assign_pass.cc"
"graph/passes/addn_pass.cc"
"graph/passes/common_subexpression_elimination_pass.cc"
"graph/passes/transop_symmetry_elimination_pass.cc"
"graph/passes/save_pass.cc"
"graph/passes/switch_dead_branch_elimination.cc"
"graph/passes/switch_logic_remove_pass.cc"
"graph/passes/switch_data_edges_bypass.cc"
"graph/passes/merge_pass.cc"
"graph/passes/variable_format_pass.cc"
"graph/passes/variable_op_pass.cc"
"graph/passes/cast_remove_pass.cc"
"graph/passes/transpose_transdata_pass.cc"
"graph/passes/hccl_memcpy_pass.cc"
"graph/passes/flow_ctrl_pass.cc"
"graph/passes/global_step_insert_pass.cc"
"graph/passes/link_gen_mask_nodes_pass.cc"
"graph/passes/replace_with_empty_const_pass.cc"
"graph/passes/hccl_group_pass.cc"
"graph/passes/memcpy_addr_async_pass.cc"
"graph/passes/set_input_output_offset_pass.cc"
"graph/manager/model_manager/event_manager.cc"
"graph/manager/util/rt_context_util.cc"
"graph/manager/util/variable_accelerate_ctrl.cc"
"graph/manager/util/debug.cc"
"graph/load/new_model_manager/model_manager.cc"
"graph/load/new_model_manager/data_inputer.cc"
"graph/load/new_model_manager/davinci_model.cc"
"graph/load/new_model_manager/davinci_model_parser.cc"
"graph/load/new_model_manager/model_utils.cc"
"graph/load/new_model_manager/aipp_utils.cc"
"graph/load/new_model_manager/tbe_handle_store.cc"
"graph/load/new_model_manager/cpu_queue_schedule.cc"
"graph/load/new_model_manager/zero_copy_task.cc"
"graph/load/new_model_manager/zero_copy_offset.cc"
"graph/load/new_model_manager/data_dumper.cc"
"graph/load/new_model_manager/task_info/task_info.cc"
"graph/load/new_model_manager/task_info/event_record_task_info.cc"
"graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"graph/load/new_model_manager/task_info/kernel_task_info.cc"
"graph/load/new_model_manager/task_info/label_set_task_info.cc"
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"single_op/task/op_task.cc"
"single_op/task/build_task_utils.cc"
"single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc"
"single_op/single_op.cc"
"single_op/single_op_model.cc"
"single_op/stream_resource.cc"
"single_op/single_op_manager.cc"
"hybrid/hybrid_davinci_model_stub.cc"
"ir_build/ge_ir_build.cc"
"ir_build/atc_ir_common.cc"
"graph/preprocess/insert_op/ge_aipp_op.cc"
"graph/preprocess/insert_op/util_insert_aipp_op.cc"
"hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"graph/build/model_builder.cc"
"graph/build/task_generator.cc"
"graph/build/stream_allocator.cc"
"graph/build/logical_stream_allocator.cc"
"graph/build/stream_graph_optimizer.cc"
"graph/build/run_context.cc"
"graph/build/label_allocator.cc"
"graph/label/label_maker.cc"
"graph/label/if_label_maker.cc"
"graph/label/case_label_maker.cc"
"graph/label/while_label_maker.cc"
"graph/label/partitioned_call_label_maker.cc"
"analyzer/analyzer.cc"
)

add_library(ge_compiler SHARED ${INFER_SRC_LIST} ${PROTO_SRCS})

target_compile_definitions(ge_compiler PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
REUSE_MEMORY=1
FMK_SUPPORT_DUMP
FMK_HOST_INFER
COMPILE_OMG_PACKAGE
)

target_compile_options(ge_compiler PRIVATE
-O2
)

target_include_directories(ge_compiler PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/analyzer
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${GE_CODE_DIR}/inc/framework/common
${METADEF_DIR}
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/external
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
#### blue zone ####
${ASCEND_DIR}/driver/include
${ASCEND_DIR}/fwkacllib/include
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_compiler
$<BUILD_INTERFACE:intf_pub>
ge_memory
-Wl,--no-as-needed
graph
ge_common
protobuf
register
c_sec
error_manager
slog
mmpa
runtime_compile
resource
-Wl,--as-needed
json
-lrt
-ldl
)

############ libascendcl.so ############
file(GENERATE OUTPUT ${CMAKE_BINARY_DIR}/dummy.c CONTENT "")
#add_library(dummy_obj OBJECT ${CMAKE_BINARY_DIR}/dummy.c)
#set(DUMMY_OBJ $<TARGET_OBJECTS:dummy_obj>)

file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object)

if(EXISTS ${STATIC_ACL_LIB}/libascendcl.a)
execute_process(
COMMAND ar x ${STATIC_ACL_LIB}/libascendcl.a
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object
)
file(GLOB OBJECT_LIST ${CMAKE_CURRENT_BINARY_DIR}/ascendcl_object/*.o)
else()
set(OBJECT_LIST ${CMAKE_BINARY_DIR}/dummy.c)
endif()

add_library(opensrc_ascendcl SHARED
${OBJECT_LIST}
)
target_compile_options(opensrc_ascendcl PRIVATE
-O2
-fvisibility=hidden
)
target_link_options(opensrc_ascendcl PRIVATE
-rdynamic
-Wl,--allow-multiple-definition
-Wl,-z,muldefs
-Wl,-Bsymbolic
-Wl,--exclude-libs,ALL
)
target_link_libraries(opensrc_ascendcl PRIVATE
-Wl,--whole-archive
ge_executor
ascendcl_static
ge_common_static
graph_static
protobuf_static
register_static
error_manager_static
adump_server
msprofiler
-Wl,--no-whole-archive
-Wl,--no-as-needed
c_sec
runtime
mmpa
slog
msprof
ascend_hal_stub
-Wl,--as-needed
-ldl
json
)

set_target_properties(opensrc_ascendcl PROPERTIES
OUTPUT_NAME ascendcl
)

##################################################################
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc
${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc
${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc
COMMAND echo "Generating stub files."
&& ${HI_PYTHON} ${CMAKE_CURRENT_LIST_DIR}/stub/gen_stubapi.py ${GE_CODE_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR}
&& mv ge_ir_build.cc stub_ge_ir_build.cc
&& mv ge_api.cc stub_ge_api.cc
&& mv ge_prof.cc stub_ge_prof.cc
&& echo "Generating stub files end."
#WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
#DEPENDS stub/gen_stubapi.py ${TOP_DIR}/inc/external ${CMAKE_CURRENT_BINARY_DIR}
)

add_custom_target(ge_stub
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/stub_ge_ir_build.cc
${CMAKE_CURRENT_BINARY_DIR}/stub_ge_api.cc
${CMAKE_CURRENT_BINARY_DIR}/stub_ge_prof.cc
)

##################################################################
############ stub/libge_compiler.so ############
add_library(atc_stub_ge_compiler SHARED
stub_ge_ir_build.cc
)

add_dependencies(atc_stub_ge_compiler ge_stub)

target_link_libraries(atc_stub_ge_compiler PRIVATE
$<BUILD_INTERFACE:intf_pub>
)

set_target_properties(atc_stub_ge_compiler PROPERTIES
OUTPUT_NAME ge_compiler
LIBRARY_OUTPUT_DIRECTORY atc_stub
)

target_include_directories(atc_stub_ge_compiler PRIVATE
${GE_CODE_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/analyzer
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/framework
${GE_CODE_DIR}/inc/framework/common
${GE_CODE_DIR}/inc/external
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
#### yellow zone ####
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
#### blue zone ####
${ASCEND_DIR}/driver/include
${ASCEND_DIR}/fwkacllib/include
)

############ stub/libge_runner.so ############
add_library(fwk_stub_ge_runner SHARED
stub_ge_api.cc
stub_ge_prof.cc
)

add_dependencies(fwk_stub_ge_runner ge_stub)

target_link_libraries(fwk_stub_ge_runner PRIVATE
$<BUILD_INTERFACE:intf_pub>
)

set_target_properties(fwk_stub_ge_runner PROPERTIES
OUTPUT_NAME ge_runner
LIBRARY_OUTPUT_DIRECTORY fwk_stub
)

target_include_directories(fwk_stub_ge_runner PRIVATE
${GE_CODE_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/analyzer
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${GE_CODE_DIR}/inc/framework/common
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
#### yellow zone ####
${GE_CODE_DIR}/../inc/cce
${GE_CODE_DIR}/../toolchain/ide/ide-daemon/external
#### blue zone ####
${ASCEND_DIR}/driver/include
${ASCEND_DIR}/fwkacllib/include
)

###############################################################
add_custom_target(
engine_conf.json ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json
COMMAND cp ${CMAKE_CURRENT_LIST_DIR}/engine_manager/engine_conf.json ${CMAKE_CURRENT_BINARY_DIR}/
)

###############################################################
add_custom_target(
optimizer_priority.pbtxt ALL
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt
COMMAND cp ${CMAKE_CURRENT_LIST_DIR}/opskernel_manager/optimizer_priority.pbtxt ${CMAKE_CURRENT_BINARY_DIR}/
)

###############################################################

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS ge_runner ge_compiler opensrc_ascendcl OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

install(TARGETS atc_stub_ge_compiler fwk_stub_ge_runner OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/stub
)

install(FILES
${CMAKE_CURRENT_BINARY_DIR}/engine_conf.json
${CMAKE_CURRENT_BINARY_DIR}/optimizer_priority.pbtxt OPTIONAL
DESTINATION ${INSTALL_LIBRARY_DIR}
)

+ 0
- 0
ge/README.md View File


+ 0
- 171
ge/common/CMakeLists.txt View File

@@ -1,171 +0,0 @@
set(PROTO_LIST
"${METADEF_DIR}/proto/om.proto"
"${METADEF_DIR}/proto/ge_ir.proto"
"${METADEF_DIR}/proto/insert_op.proto"
"${METADEF_DIR}/proto/task.proto"
"${METADEF_DIR}/proto/tensorflow/attr_value.proto"
"${METADEF_DIR}/proto/tensorflow/function.proto"
"${METADEF_DIR}/proto/tensorflow/graph.proto"
"${METADEF_DIR}/proto/tensorflow/node_def.proto"
"${METADEF_DIR}/proto/tensorflow/op_def.proto"
"${METADEF_DIR}/proto/tensorflow/resource_handle.proto"
"${METADEF_DIR}/proto/tensorflow/tensor.proto"
"${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
"${METADEF_DIR}/proto/tensorflow/types.proto"
"${METADEF_DIR}/proto/tensorflow/versions.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

set(SRC_LIST
"context/ctx.cc"
"model_saver.cc"
"ge/datatype_util.cc"
"helper/om_file_helper.cc"
"helper/model_helper.cc"
"../model/ge_model.cc"
"auth/file_saver.cc"
"fp16_t.cc"
"math/fp16_math.cc"
"debug/memory_dumper.cc"
"formats/utils/formats_trans_utils.cc"
"dump/dump_properties.cc"
"formats/format_transfers/datatype_transfer.cc"
"formats/format_transfers/format_transfer_transpose.cc"
"formats/format_transfers/format_transfer_nchw_nc1hwc0.cc"
"formats/format_transfers/format_transfer_fractal_z.cc"
"formats/format_transfers/format_transfer_fractal_nz.cc"
"formats/format_transfers/format_transfer_fractal_zz.cc"
"formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc"
"formats/format_transfers/format_transfer_nc1hwc0_nchw.cc"
"formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc"
"formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc"
"formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc"
"formats/format_transfers/format_transfer_fracz_nchw.cc"
"formats/format_transfers/format_transfer_fracz_nhwc.cc"
"formats/format_transfers/format_transfer_fracz_hwcn.cc"
"formats/format_transfers/format_transfer_dhwcn_fracz3D.cc"
"formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc"
"formats/format_transfers/format_transfer_nchw_fz_c04.cc"
"formats/formats.cc"
"ge_format_util.cc"
"fmk_error_codes.cc"
"util.cc"
"properties_manager.cc"
"types.cc"
"model_parser/base.cc"
"kernel_store.cc"
"tbe_kernel_store.cc"
"cust_aicpu_kernel_store.cc"
"op/attr_value_util.cc"
"op/ge_op_utils.cc"
"thread_pool.cc"
"ge/tbe_plugin_manager.cc"
)

############ libge_common.so ############
add_library(ge_common SHARED ${SRC_LIST} ${PROTO_HDRS})
target_compile_definitions(ge_common PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
HOST_VISIBILITY
FMK_SUPPORT_DUMP
OS_CENTOS
)

target_compile_options(ge_common PRIVATE
-fvisibility=hidden
-O2
-Werror
)

target_include_directories(ge_common PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/common
${GE_CODE_DIR}/ge/common/op
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_DEPEND_DIR}/inc
${GE_DEPEND_DIR}/inc/cce
#### blue zone ####
#${GE_DEPEND_DIR}/include
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_common PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
graph
protobuf
register
c_sec
error_manager
slog
mmpa
-Wl,--as-needed
json
-lrt
-ldl
)

############ libge_common.a ############
add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS})
target_compile_definitions(ge_common_static PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
HOST_VISIBILITY
FMK_SUPPORT_DUMP
OS_CENTOS
)

target_compile_options(ge_common_static PRIVATE
-fvisibility=hidden
-O2
-Werror
)

target_include_directories(ge_common_static PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/ge/common
${GE_CODE_DIR}/ge/common/op
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_DEPEND_DIR}/inc
${GE_DEPEND_DIR}/inc/cce
#### blue zone ####
#${GE_DEPEND_DIR}/include
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(ge_common_static PRIVATE
$<BUILD_INTERFACE:intf_pub>
protobuf
json
c_sec
-lrt
-ldl
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS ge_common OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

+ 0
- 206
ge/common/proto/ge_ir.proto View File

@@ -1,206 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package ge.proto;

enum DataType
{
DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set.
DT_FLOAT = 1; // float type
DT_FLOAT16 = 2; // fp16 type
DT_INT8 = 3; // int8 type
DT_UINT8 = 4; // uint8 type
DT_INT16 = 5; // int16 type
DT_UINT16 = 6; // uint16 type
DT_INT32 = 7; //
DT_INT64 = 8; // int64 type
DT_UINT32 = 9; // unsigned int32
DT_UINT64 = 10; // unsigned int64
DT_BOOL = 11; // bool type
DT_DOUBLE = 12; // double type
DT_STRING = 13; // string type
DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */
DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */
DT_COMPLEX64 = 16; // complex64 type
DT_COMPLEX128 = 17; // complex128 type
DT_QINT8 = 18; // qint8 type
DT_QINT16 = 19; // qint16 type
DT_QINT32 = 20; // qint32 type
DT_QUINT8 = 21; // quint8 type
DT_QUINT16 = 22; // quint16 type
DT_RESOURCE = 23; // resource type
DT_STRING_REF = 24; // string_ref type
DT_DUAL = 25; /**< dual output type */
}

message AttrDef
{
message ListValue
{
enum ListValueType{
VT_LIST_NONE = 0;
VT_LIST_STRING = 1;
VT_LIST_INT = 2;
VT_LIST_FLOAT = 3;
VT_LIST_BOOL = 4;
VT_LIST_BYTES = 5;
VT_LIST_TENSOR_DESC = 6;
VT_LIST_TENSOR = 7;
VT_LIST_GRAPH = 8;
VT_LIST_NAMED_ATTRS = 9;
VT_LIST_DATA_TYPE = 10;
}
repeated bytes s = 2; // "list(string)"
repeated int64 i = 3; // "list(int)"
repeated float f = 4; // "list(float)"
repeated bool b = 5; // "list(bool)"
repeated bytes bt = 7;
repeated TensorDescriptor td = 8;
repeated TensorDef t = 9;
repeated GraphDef g = 10;
repeated NamedAttrs na = 11;
repeated int64 dt = 12; // list ge::DataType

ListValueType val_type = 20;
}

message ListListInt{
message ListInt{
repeated int64 list_i = 1; // list int
}
repeated ListInt list_list_i = 1; // list list int
}

oneof value
{
bytes s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10; // Used to support attr nesting
TensorDescriptor td = 11; // GeTensorDesc type
TensorDef t = 12; // GeTensor type
GraphDef g = 13; // Graph type
ListListInt list_list_int = 14; // List List Int type
int64 dt = 15; // ge::DataType
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs
{
string name = 1;
map<string, AttrDef> attr = 2;
}

// Shape / dimension description, using row-major order
message ShapeDef
{
repeated int64 dim = 1; // Size of each dimension
}

// Multidimensional data description
message TensorDescriptor
{
string name = 1; // Optional parameter, tensor name

DataType dtype = 2; // tensor datatype
ShapeDef shape = 3; // Shape / dimension
string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND"

bool has_out_attr = 9;
int64 size = 10;
int64 weight_size = 11;
bool reuse_input = 12;
bool output_tensor = 13;
string device_type = 14;
bool input_tensor =15;
int64 real_dim_cnt = 16;
int64 reuse_input_index = 17;
int64 data_offset = 18;
int64 cmps_size = 19;
string cmps_tab = 20;
int64 cmps_tab_offset = 21;

map<string, AttrDef> attr = 5; // Set of extra parameter fields
}

// GeTensor definition
message TensorDef
{
TensorDescriptor desc = 1; // Tensor description
bytes data = 2; // Tensor data
}


// Operator description
message OpDef
{
string name = 1; // name
string type = 2; // type

repeated string input = 5; // input original op name + outgoing index. op_name:index

map<string, AttrDef> attr = 10; // Set of operator parameter fields

bool has_out_attr = 20;
int64 id = 21;
int64 stream_id =22;
repeated string input_name = 23;
repeated string src_name = 24;
repeated int64 src_index = 25;
repeated string dst_name = 26;
repeated int64 dst_index = 27;
repeated int64 input_i = 28;
repeated int64 output_i = 29;
repeated int64 workspace = 30;
repeated int64 workspace_bytes = 31;
repeated bool is_input_const = 32;
repeated TensorDescriptor input_desc = 33;
repeated TensorDescriptor output_desc = 34;
repeated string subgraph_name = 35;
}

// Graph definition
message GraphDef
{
string name = 1; // name

repeated string input = 4; // Graph input
repeated string output = 5; // Graph output

repeated OpDef op = 6; // List of operators

map<string, AttrDef> attr = 11; // Extended field
}

// model definition
message ModelDef
{
string name = 1; // name
uint32 version = 2; // IR Proto verion
string custom_version = 3; // User model version number, passed in by user

repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef

map<string, AttrDef> attr = 11; // Extended field
}


+ 0
- 152
ge/common/proto/insert_op.proto View File

@@ -1,152 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message InsertNewOps {
repeated AippOpParams aipp_op = 1;
repeated MultiShapeOpParams multi_shape_op = 2;
}

message AippOpParams {
enum InputFormat {
UNDEFINED = 0;
YUV420SP_U8 = 1;
XRGB8888_U8 = 2;
RGB888_U8 = 3;
YUV400_U8 = 4;
NC1HWC0DI_FP16 = 5;
NC1HWC0DI_S8 = 6;
ARGB8888_U8 = 7;
YUYV_U8 = 8;
YUV422SP_U8 = 9;
AYUV444_U8 = 10;
RAW10 = 11;
RAW12 = 12;
RAW16 = 13;
RAW24 = 14;
RGB16 = 15;
RGB20 = 16;
RGB24 = 17;
RGB8_IR = 18;
RGB16_IR = 19;
RGB24_IR = 20;
}

enum AippMode {
undefined = 0;
static = 1;
dynamic = 2;
}

// AIPP模式,区分静态AIPP和动态AIPP
AippMode aipp_mode = 1;

// related_input_rank参数为必填,类型为整型,配置范围>=0, <=输入Data算子的个数,默认值为0。
// 标识对模型的第几个输入做AIPP处理,例如模型有两个输入,需要对第2个输入做AIPP,则配置related_input_rank为1。
uint32 related_input_rank = 2;

// input_edge_idx参数为可选,类型为整型,配置范围为>=0。
// 配置该参数的作用,在于对Data算子不同的输出做不同的AIPP处理,如果该参数没有配置,默认对related_input_rank指定的模型输入的所有输出边做AIPP。
// 配置值 <= Data算子输出边的个数。
repeated uint32 input_edge_idx = 3;

// [Begin] 动态AIPP参数,配置静态AIPP时无效
uint32 max_src_image_size = 4;

// 是否支持旋转。默认不支持,开启支持旋转时,会有额外的空间和性能损失
bool support_rotation = 5;

// [End] 动态AIPP参数


// [Begin] 静态AIPP参数,配置动态AIPP时无效
InputFormat input_format = 51;
bool csc_switch = 52;
float cpadding_value = 53;
bool rbuv_swap_switch = 54;
bool ax_swap_switch = 55;
bool single_line_mode = 56;

int32 src_image_size_w = 57;
int32 src_image_size_h = 58;

bool crop = 59;
int32 load_start_pos_w = 60;
int32 load_start_pos_h = 61;
int32 crop_size_w = 62;
int32 crop_size_h = 63;

bool resize = 64;
int32 resize_output_w = 65;
int32 resize_output_h = 66;

bool padding = 67;
int32 left_padding_size = 68;
int32 right_padding_size = 69;
int32 top_padding_size = 70;
int32 bottom_padding_size = 71;

int32 mean_chn_0 = 10;
int32 mean_chn_1 = 11;
int32 mean_chn_2 = 12;
int32 mean_chn_3 = 19;
float min_chn_0 = 13;
float min_chn_1 = 14;
float min_chn_2 = 15;
float min_chn_3 = 20;
repeated float var_reci_chn_0 = 16;
repeated float var_reci_chn_1 = 17;
repeated float var_reci_chn_2 = 18;
repeated float var_reci_chn_3 = 21;

repeated int32 matrix_r0c0 = 30;
repeated int32 matrix_r0c1 = 31;
repeated int32 matrix_r0c2 = 32;
repeated int32 matrix_r1c0 = 33;
repeated int32 matrix_r1c1 = 34;
repeated int32 matrix_r1c2 = 35;
repeated int32 matrix_r2c0 = 36;
repeated int32 matrix_r2c1 = 37;
repeated int32 matrix_r2c2 = 38;
repeated int32 output_bias_0 = 39;
repeated int32 output_bias_1 = 40;
repeated int32 output_bias_2 = 41;
repeated int32 input_bias_0 = 42;
repeated int32 input_bias_1 = 43;
repeated int32 input_bias_2 = 44;

// [End] 静态AIPP参数

// The n number that is used for raw/rgbir data into f16 transformation.
// The transformation equation is x/(2^n). If set to 0, no transform is performed.
uint32 raw_rgbir_to_f16_n = 45;
}

message MultiShapeOpParams {
enum MultiShapeMode {
batch = 0; //动态batch
resolution = 1; //动态分辨率,扩展用
}

MultiShapeMode mode = 1; //算子模式
uint32 related_input_rank = 2; //新增算子插入到哪个输入


repeated uint32 batch_list = 11; //batch_list值,batch_list的个数是2到8之间
}

+ 0
- 401
ge/common/proto/om.proto View File

@@ -1,401 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

enum TargetType
{
MINI = 0;
TINY = 1;
LITE = 2;
}

// offline model
message ModelDef {
string name = 1;
uint32 version = 2;

uint64 memory_size = 10;
uint32 stream_num = 11;
uint32 event_num = 12;
uint64 weight_size = 13;
uint32 label_num = 15;
repeated OpDef op = 20;
TargetType target_type = 23;

map<string, AttrDef> attr = 30;
};

// operator define
message OpDef {
string name = 1;
string type = 2;

uint32 id = 3;
uint32 stream_id = 4;

repeated string input_name = 5;

repeated string src_name = 8;
repeated int32 src_index = 9;
repeated int64 input = 10;
repeated int64 output = 11;
repeated TensorDescriptor input_desc = 12;
repeated TensorDescriptor output_desc = 13;
repeated WeightDef weights = 14;
repeated string dst_name = 15;
repeated int32 dst_index = 16;

repeated int64 workspace = 20;
repeated uint32 workspace_bytes = 21;

repeated string weight_name = 22;
repeated bool is_input_const = 23;

map<string, AttrDef> attr = 30;

QuantizeFactorParams quantize_factor = 31;

oneof op_params {
// start at 100 here
SendOpParams sender_param = 100;
RecvOpParams receiver_param = 200;
ConvolutionOpParams convolution_param = 300;
PoolingOpParams pooling_param = 400;
EltwiseOpParams eltwise_param = 500;
BatchNormOpParams batchnorm_param = 600;
ScaleOpParams scale_param = 700;
FullConnectionOpParams full_connection_param = 800;
SoftmaxOpParams softmax_param = 900;
ActivationOpParams activation_param = 1000;
ReshapeOpParams reshape_param = 1100;
}
};

message SendOpParams {
uint32 event_id = 1;
};

message RecvOpParams {
uint32 event_id = 1;
};

enum QuantizeScaleType
{
VECTOR_SCALE = 0;
SCALAR_SCALE = 1;
}

enum QuantizeScaleMode
{
NORMAL_MODE = 0;
SQRT_MODE = 1;
}

enum QuantizeAlgorithm
{
NON_OFFSET_ALGO = 0;
HALF_OFFSET_ALGO = 1;
ALL_OFFSET_ALGO = 2;
}
message QuantizeFactor
{
QuantizeScaleMode scale_mode = 1;
bytes scale_value = 2;
int64 scale_offset = 3;
bytes offset_data_value = 4;
int64 offset_data_offset = 5;
bytes offset_weight_value = 6;
int64 offset_weight_offset = 7;
bytes offset_pad_value = 8;
int64 offset_pad_offset = 9;
};

message QuantizeCalcFactor
{
bytes offsetw = 1;
int64 offsetw_offset = 2;
bytes offsetd = 3;
int64 offsetd_offset = 4;
bytes scalereq = 5;
int64 scaledreq_offset = 6;
bytes offsetdnext = 7;
int64 offsetdnext_offset = 8;
}

message QuantizeFactorParams
{
QuantizeAlgorithm quantize_algo = 1;
QuantizeScaleType scale_type = 2;
QuantizeFactor quantize_param = 3;
QuantizeFactor dequantize_param = 4;
QuantizeFactor requantize_param = 5;
QuantizeCalcFactor quantizecalc_param = 6;
};

message ConvolutionOpParams {
int32 mode = 1;
int32 algo = 2;
int32 pad_mode = 3;
uint32 group = 4;
uint32 num_output = 5;

repeated uint32 pad = 10;
repeated uint32 stride = 11;
repeated uint32 dilation = 12;
repeated uint32 kernel = 13;

float alpha = 20;
float beta = 21;

WeightDef filter = 40;
WeightDef bias = 41;

bool relu_flag = 62;
repeated uint32 adj = 70;
repeated uint32 target_shape = 71;
repeated uint32 before_pad = 72;
};

message PoolingOpParams {
int32 mode = 1;
int32 nan_opt = 2;
int32 pad_mode = 3;
bool global_pooling = 4;

repeated uint32 window = 10;
repeated uint32 pad = 11;
repeated uint32 stride = 12;
bool ceil_mode = 13;
int32 data_mode = 14;

float alpha = 20;
float beta = 21;
repeated uint32 before_pad = 22;
};

message EltwiseOpParams {
int32 mode = 1;
repeated float coeff = 2;
float alpha = 3;
float beta = 4;
repeated WeightDef weight = 5;
bool relu_flag = 6;
};

message ActivationOpParams {
int32 mode = 1;
float coef = 2;
float alpha = 3;
float beta = 4;
};

message BatchNormOpParams {
int32 mode = 1;

float alpha = 2;
float beta = 3;
double epsilon = 4;//optinal,[default = 1e-5]
bool use_global_stats = 5; //optinal,by default true,testing mode
float moving_average_fraction = 6; //optinal,[default = .999];

WeightDef estimated_mean = 7;
WeightDef estimated_variance = 8;

WeightDef scale = 9;
WeightDef bias = 10;
};

message ScaleOpParams {
WeightDef scale = 1;
WeightDef bias = 2;
};

message ReshapeOpParams {
float alpha = 1;
float beta = 2;
ShapeDef shape = 3;
int32 axis = 4;
int32 num_axes = 5;
int32 format = 6;
};

message SoftmaxOpParams {
int32 algo = 1;
int32 mode = 2;
float alpha = 3;
float beta = 4;
};

message FullConnectionOpParams {
WeightDef filter = 1;
WeightDef bias = 2;
uint32 num_output = 3;
bool relu_flag = 12;
};

message FlattenOpParams {
float alpha = 1;
float beta = 2;
int32 start_axis = 3;
int32 end_axis = 4;
}

message AddLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message MulLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message AddOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message MulOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message SubOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message BiasAddOpParams {
float alpha = 1;
float beta = 2;

WeightDef bias = 10;
};

message MatMulOpParams {
float alpha = 1;
float beta = 2;
bool transposeX = 3;
bool transposeW = 4;

WeightDef filter = 10;
WeightDef bias = 12;
};

message RsqrtOpParams {
float alpha = 1;
float beta = 2;
};


message WeightDef {
int32 format = 1;
int32 data_type = 2;
ShapeDef shape = 3;
bytes data = 4;
int64 data_offset = 5;
uint32 cmps_size = 6;
bytes cmps_tab = 7;
int64 cmps_tab_offset = 10;
CompressInfo cmps_info = 8;
AllOffsetQuantizeInfo alloffset_quantize_info = 11;
}

message ShapeDef {
repeated int64 dim = 1;
}

enum DeviceType {
NPU = 0; // In default, we will use NPU.
CPU = 1; // CPU
}

message AllOffsetQuantizeInfo {
float scale = 1;
int32 offset = 2;
}

message TensorDescriptor {
int32 format = 1;
int32 data_type = 2;
repeated int64 dim = 3;
uint32 size = 4;
bool reuse_input = 5;
bool output_tensor = 7;
DeviceType device_type = 8;
bool input_tensor = 9;
uint32 real_dim_cnt = 10;
uint32 reuse_input_index = 11;
AllOffsetQuantizeInfo alloffset_quantize_info = 12;
}

message CompressInfo {
int32 blockRow = 1; // block row
int32 blockCol = 2; // block col
int32 fractalK = 3; // fractal K
int32 fractalN = 4; // fractal N
int32 lastFractalK = 5; // K of last fractal
int32 lastFractalN = 6; // N of last fractal
int32 cubeSize = 7; // cube's length
int32 loadDir = 8; // data load directtiono 0:col load 1:row load
}

message AttrDef {
message ListValue {
repeated string s = 2; // "list(string)"
repeated int64 i = 3 [packed = true]; // "list(int)"
repeated float f = 4 [packed = true]; // "list(float)"
repeated bool b = 5 [packed = true]; // "list(bool)"
repeated uint32 u = 6 [packed = true]; // "list(uint)"
repeated bytes bt = 7;
}

oneof value {
string s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
uint32 u = 6; // "uint32"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10;
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs {
string name = 1;
map<string, AttrDef> attr = 2;
}


+ 0
- 170
ge/common/proto/task.proto View File

@@ -1,170 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message ModelTaskDef {
string version = 1;

map<string, string> attr = 9; // Extended field
repeated TaskDef task = 10;

uint64 memory_size = 11;
uint32 stream_num = 12;
uint32 event_num = 13;
uint64 weight_size = 14;

repeated bytes op = 15; // input/output opdef in bytes

uint64 base_addr = 16; // base addr
uint64 weight_addr = 17; // weight addr
uint32 batch_num = 18;
}


message TaskDef {
uint32 id = 1;
uint32 type = 2;

uint32 stream_id = 10;
uint32 event_id = 11;

KernelDef kernel = 20;
KernelExDef kernel_ex = 21;
KernelHcclDef kernel_hccl = 25;
EventExDef event_ex = 26;
LogTimeStampDef log_timestamp = 28;

uint32 label_id = 30;

MemcpyAsyncDef memcpy_async = 31;
StreamSwitchDef stream_switch = 32;
StreamActiveDef stream_active = 33;
bytes private_def = 34;
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
StreamSwitchNDef stream_switch_n = 36;

LabelSetDef label_set = 37;
LabelGotoExDef label_goto_ex = 38;
LabelSwitchByIndexDef label_switch_by_index = 39;
}

message KernelDef {
KernelContext context = 1;

string stub_func = 10;
uint32 block_dim = 11;
uint32 args_size = 12;
bytes args = 13;
bytes sm_desc = 14;
bytes flowtable = 15;
string so_name = 16;
string kernel_name = 17;
bytes kernel_ext_info = 18;
uint32 kernel_ext_info_size = 19;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2; // OP type in CCE
uint32 kernel_func_id = 3;
uint32 op_index = 4; // TE/Custom operator
bool is_flowtable = 5; // Identify whether args is a flowtable structure
bytes args_offset = 6; // args offset information
uint32 args_count = 7; // args count
repeated uint32 origin_op_index = 8;
}


message KernelExDef {
uint32 flags = 1;

uint32 op_index = 4;
uint32 args_size = 12;
bytes args = 13;
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
uint32 task_info_size = 15;
bytes kernel_ext_info = 16;
uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
uint32 op_index = 8;
string hccl_type = 9;
}


message EventExDef {
uint32 op_index = 1;
uint32 event_type = 2;
}

message LogTimeStampDef {
uint64 logid = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
uint32 op_index = 6;
}

message StreamSwitchDef {
uint32 op_index = 1;
uint32 true_stream_id = 2;
int64 value = 3;
uint64 value_ptr = 4;
uint32 data_type = 5;
}

message StreamActiveDef {
uint32 op_index = 1;
uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
uint32 op_index = 1;
uint32 size = 2;
repeated int64 target_value = 3;
repeated uint32 true_stream_id = 4;
uint32 element_size = 5;
uint32 data_type = 6;
}

message LabelSetDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelGotoExDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
uint32 op_index = 1;
uint32 label_max = 2;
}

+ 0
- 62
ge/common/proto/tensorflow/attr_value.proto View File

@@ -1,62 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "AttrValueProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensor.proto";
import "tensor_shape.proto";
import "types.proto";

// Protocol buffer representing the value for an attr used to configure an Op.
// Comment indicates the corresponding attr type. Only the field matching the
// attr type may be filled.
message AttrValue {
// LINT.IfChange
message ListValue {
repeated bytes s = 2; // "list(string)"
repeated int64 i = 3 [packed = true]; // "list(int)"
repeated float f = 4 [packed = true]; // "list(float)"
repeated bool b = 5 [packed = true]; // "list(bool)"
repeated DataType type = 6 [packed = true]; // "list(type)"
repeated TensorShapeProto shape = 7; // "list(shape)"
repeated TensorProto tensor = 8; // "list(tensor)"
repeated NameAttrList func = 9; // "list(attr)"
}
// LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc)

oneof value {
bytes s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
DataType type = 6; // "type"
TensorShapeProto shape = 7; // "shape"
TensorProto tensor = 8; // "tensor"
ListValue list = 1; // any "list(...)"

// "func" represents a function. func.name is a function's name or
// a primitive op's name. func.attr.first is the name of an attr
// defined for that function. func.attr.second is the value for
// that attr in the instantiation.
NameAttrList func = 10;

// This is a placeholder only used in nodes defined inside a
// function. It indicates the attr value will be supplied when
// the function is instantiated. For example, let us suppose a
// node "N" in function "FN". "N" has an attr "A" with value
// placeholder = "foo". When FN is instantiated with attr "foo"
// set to "bar", the instantiated node N's attr A will have been
// given the value "bar".
string placeholder = 9;
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NameAttrList {
string name = 1;
map<string, AttrValue> attr = 2;
}

+ 0
- 100
ge/common/proto/tensorflow/function.proto View File

@@ -1,100 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "FunctionProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";
import "node_def.proto";
import "op_def.proto";

// A library is a set of named functions.
message FunctionDefLibrary {
repeated FunctionDef function = 1;
repeated GradientDef gradient = 2;
}

// A function can be instantiated when the runtime can bind every attr
// with a value. When a GraphDef has a call to a function, it must
// have binding for every attr defined in the signature.
// * device spec, etc.
message FunctionDef {
// The definition of the function's name, arguments, return values,
// attrs etc.
OpDef signature = 1;

// Attributes specific to this function definition.
map<string, AttrValue> attr = 5;

// NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21.
reserved 2;

// In both of the following fields, there is the need to specify an
// output that is used as either the input to another node (in
// `node_def`) or as a return value of the function (in `ret`).
// Unlike the NodeDefs in GraphDef, we need to be able to specify a
// list in some cases (instead of just single outputs). Also, we
// need to be able to deal with lists of unknown length (so the
// output index may not be known at function definition time). So
// we use the following format instead:
// * "fun_in" where "fun_in" is the name of a function input arg in
// the `signature` field above. This represents that input, whether
// it is a single tensor or a list.
// * "fun_in:0" gives the first element of a function input arg (a
// non-list input is considered a list of length 1 for these
// purposes).
// * "node:out" where "node" is the name of a node in `node_def` and
// "out" is the name one of its op's output arguments (the name
// comes from the OpDef of the node's op). This represents that
// node's output, whether it is a single tensor or a list.
// Note: We enforce that an op's output arguments are never
// renamed in the backwards-compatibility test.
// * "node:out:0" gives the first element of a node output arg (a
// non-list output is considered a list of length 1 for these
// purposes).
//
// NOT CURRENTLY SUPPORTED (but may be in the future):
// * "node:out:-1" gives last element in a node output list
// * "node:out:1:" gives a list with all but the first element in a
// node output list
// * "node:out::-1" gives a list with all but the last element in a
// node output list

// The body of the function. Unlike the NodeDefs in a GraphDef, attrs
// may have values of type `placeholder` and the `input` field uses
// the "output" format above.

// By convention, "op" in node_def is resolved by consulting with a
// user-defined library first. If not resolved, "func" is assumed to
// be a builtin op.
repeated NodeDef node_def = 3;

// A mapping from the output arg names from `signature` to the
// outputs from `node_def` that should be returned by the function.
map<string, string> ret = 4;
}

// GradientDef defines the gradient function of a function defined in
// a function library.
//
// A gradient function g (specified by gradient_func) for a function f
// (specified by function_name) must follow the following:
//
// The function 'f' must be a numerical function which takes N inputs
// and produces M outputs. Its gradient function 'g', which is a
// function taking N + M inputs and produces N outputs.
//
// I.e. if we have
// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
// then, g is
// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
// dL/dy1, dL/dy2, ..., dL/dy_M),
// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
// loss function). dL/dx_i is the partial derivative of L with respect
// to x_i.
message GradientDef {
string function_name = 1; // The function name.
string gradient_func = 2; // The gradient function's name.
}

+ 0
- 56
ge/common/proto/tensorflow/graph.proto View File

@@ -1,56 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "GraphProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "node_def.proto";
import "function.proto";
import "versions.proto";

// Represents the graph of operations
message GraphDef {
repeated NodeDef node = 1;

// Compatibility versions of the graph. See core/public/version.h for version
// history. The GraphDef version is distinct from the TensorFlow version, and
// each release of TensorFlow will support a range of GraphDef versions.
VersionDef versions = 4;

// Deprecated single version field; use versions above instead. Since all
// GraphDef changes before "versions" was introduced were forward
// compatible, this field is entirely ignored.
int32 version = 3 [deprecated = true];

// EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
//
// "library" provides user-defined functions.
//
// Naming:
// * library.function.name are in a flat namespace.
// NOTE: We may need to change it to be hierarchical to support
// different orgs. E.g.,
// { "/google/nn", { ... }},
// { "/google/vision", { ... }}
// { "/org_foo/module_bar", { ... }}
// map<string, FunctionDefLib> named_lib;
// * If node[i].op is the name of one function in "library",
// node[i] is deemed as a function call. Otherwise, node[i].op
// must be a primitive operation supported by the runtime.
//
//
// Function call semantics:
//
// * The callee may start execution as soon as some of its inputs
// are ready. The caller may want to use Tuple() mechanism to
// ensure all inputs are ready in the same time.
//
// * The consumer of return values may start executing as soon as
// the return values the consumer depends on are ready. The
// consumer may want to use Tuple() mechanism to ensure the
// consumer does not start until all return values of the callee
// function are ready.
FunctionDefLibrary library = 2;
};

+ 0
- 14
ge/common/proto/tensorflow/graph_library.proto View File

@@ -1,14 +0,0 @@
syntax = "proto3";

package domi.tensorflow;

import "graph.proto";

message GeGraphDef {
string name = 1;
GraphDef graph = 2;
}

message GraphDefLibrary {
repeated GeGraphDef graph_def = 1;
};

+ 0
- 63
ge/common/proto/tensorflow/node_def.proto View File

@@ -1,63 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "NodeProto";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";

message NodeDef {
// The name given to this operator. Used for naming inputs,
// logging, visualization, etc. Unique within a single GraphDef.
// Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*".
string name = 1;

// The operation name. There may be custom parameters in attrs.
// Op names starting with an underscore are reserved for internal use.
string op = 2;

// Each input is "node:src_output" with "node" being a string name and
// "src_output" indicating which output tensor to use from "node". If
// "src_output" is 0 the ":0" suffix can be omitted. Regular inputs
// may optionally be followed by control inputs that have the format
// "^node".
repeated string input = 3;

// A (possibly partial) specification for the device on which this
// node should be placed.
// The expected syntax for this string is as follows:
//
// DEVICE_SPEC ::= PARTIAL_SPEC
//
// PARTIAL_SPEC ::= ("/" CONSTRAINT) *
// CONSTRAINT ::= ("job:" JOB_NAME)
// | ("replica:" [1-9][0-9]*)
// | ("task:" [1-9][0-9]*)
// | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") )
//
// Valid values for this string include:
// * "/job:worker/replica:0/task:1/device:GPU:3" (full specification)
// * "/job:worker/device:GPU:3" (partial specification)
// * "" (no specification)
//
// If the constraints do not resolve to a single device (or if this
// field is empty or not present), the runtime will attempt to
// choose a device automatically.
string device = 4;

// Operation-specific graph-construction-time configuration.
// Note that this should include all attrs defined in the
// corresponding OpDef, including those with a value matching
// the default -- this allows the default to change and makes
// NodeDefs easier to interpret on their own. However, if
// an attr with a default is not specified in this list, the
// default will be used.
// The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and
// one of the names from the corresponding OpDef's attr field).
// The values must have a type matching the corresponding OpDef
// attr's type field.
// Add some examples here showing best practices.
map<string, AttrValue> attr = 5;
};

+ 0
- 164
ge/common/proto/tensorflow/op_def.proto View File

@@ -1,164 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "OpDefProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";
import "types.proto";

// Defines an operation. A NodeDef in a GraphDef specifies an Op by
// using the "op" field which should match the name of a OpDef.
// LINT.IfChange
message OpDef {
// Op names starting with an underscore are reserved for internal use.
// Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*".
string name = 1;

// For describing inputs and outputs.
message ArgDef {
// Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*".
string name = 1;

// Human readable description.
string description = 2;

// Describes the type of one or more tensors that are accepted/produced
// by this input/output arg. The only legal combinations are:
// * For a single tensor: either the "type" field is set or the
// "type_attr" field is set to the name of an attr with type "type".
// * For a sequence of tensors with the same type: the "number_attr"
// field will be set to the name of an attr with type "int", and
// either the "type" or "type_attr" field will be set as for
// single tensors.
// * For a sequence of tensors, the "type_list_attr" field will be set
// to the name of an attr with type "list(type)".
DataType type = 3;
string type_attr = 4; // if specified, attr must have type "type"
string number_attr = 5; // if specified, attr must have type "int"
// If specified, attr must have type "list(type)", and none of
// type, type_attr, and number_attr may be specified.
string type_list_attr = 6;

// For inputs: if true, the inputs are required to be refs.
// By default, inputs can be either refs or non-refs.
// For outputs: if true, outputs are refs, otherwise they are not.
bool is_ref = 16;
};

// Description of the input(s).
repeated ArgDef input_arg = 2;

// Description of the output(s).
repeated ArgDef output_arg = 3;

// Description of the graph-construction-time configuration of this
// Op. That is to say, this describes the attr fields that will
// be specified in the NodeDef.
message AttrDef {
// A descriptive name for the argument. May be used, e.g. by the
// Python client, as a keyword argument name, and so should match
// the regexp "[a-z][a-z0-9_]+".
string name = 1;

// One of the type names from attr_value.proto ("string", "list(string)",
// "int", etc.).
string type = 2;

// A reasonable default for this attribute if the user does not supply
// a value. If not specified, the user must supply a value.
AttrValue default_value = 3;

// Human-readable description.
string description = 4;


// --- Constraints ---
// These constraints are only in effect if specified. Default is no
// constraints.

// For type == "int", this is a minimum value. For "list(___)"
// types, this is the minimum length.
bool has_minimum = 5;
int64 minimum = 6;

// The set of allowed values. Has type that is the "list" version
// of the "type" field above (uses the "list" field of AttrValue).
// If type == "type" or "list(type)" above, then the "type" field
// of "allowed_values.list" has the set of allowed DataTypes.
// If type == "string" or "list(string)", then the "s" field of
// "allowed_values.list" has the set of allowed strings.
AttrValue allowed_values = 7;
}
repeated AttrDef attr = 4;

// Optional deprecation based on GraphDef versions.
OpDeprecation deprecation = 8;

// One-line human-readable description of what the Op does.
string summary = 5;

// Additional, longer human-readable description of what the Op does.
string description = 6;

// -------------------------------------------------------------------------
// Which optimizations this operation can participate in.

// True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
bool is_commutative = 18;

// If is_aggregate is true, then this operation accepts N >= 2
// inputs and produces 1 output all of the same type. Should be
// associative and commutative, and produce output with the same
// shape as the input. The optimizer may replace an aggregate op
// taking input from multiple devices with a tree of aggregate ops
// that aggregate locally within each device (and possibly within
// groups of nearby devices) before communicating.
bool is_aggregate = 16; // for things like add

// Other optimizations go here, like
// can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.

// -------------------------------------------------------------------------
// Optimization constraints.

// Ops are marked as stateful if their behavior depends on some state beyond
// their input tensors (e.g. variable reading op) or if they have
// a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
// must always produce the same output for the same input and have
// no side-effects.
//
// By default Ops may be moved between devices. Stateful ops should
// either not be moved, or should only be moved if that state can also
// be moved (e.g. via some sort of save / restore).
// Stateful ops are guaranteed to never be optimized away by Common
// Subexpression Elimination (CSE).
bool is_stateful = 17; // for things like variables, queue

// -------------------------------------------------------------------------
// Non-standard options.

// By default, all inputs to an Op must be initialized Tensors. Ops
// that may initialize tensors for the first time should set this
// field to true, to allow the Op to take an uninitialized Tensor as
// input.
bool allows_uninitialized_input = 19; // for Assign, etc.
};
// LINT.ThenChange(
// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc)

// Information about version-dependent deprecation of an op
message OpDeprecation {
// First GraphDef version at which the op is disallowed.
int32 version = 1;

// Explanation of why it was deprecated and what to use instead.
string explanation = 2;
};

// A collection of OpDefs
message OpList {
repeated OpDef op = 1;
};

+ 0
- 29
ge/common/proto/tensorflow/resource_handle.proto View File

@@ -1,29 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "ResourceHandle";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Protocol buffer representing a handle to a tensorflow resource. Handles are
// not valid across executions, but can be serialized back and forth from within
// a single run.
message ResourceHandleProto {
// Unique name for the device containing the resource.
string device = 1;

// Container in which this resource is placed.
string container = 2;

// Unique name of this resource.
string name = 3;

// Hash code for the type of the resource. Is only valid in the same device
// and in the same execution.
uint64 hash_code = 4;

// For debug-only, the name of the type pointed to by this handle, if
// available.
string maybe_type_name = 5;
};

+ 0
- 94
ge/common/proto/tensorflow/tensor.proto View File

@@ -1,94 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TensorProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "resource_handle.proto";
import "tensor_shape.proto";
import "types.proto";

// Protocol buffer representing a tensor.
message TensorProto {
DataType dtype = 1;

// Shape of the tensor.
TensorShapeProto tensor_shape = 2;

// Only one of the representations below is set, one of "tensor_contents" and
// the "xxx_val" attributes. We are not using oneof because as oneofs cannot
// contain repeated fields it would require another extra set of messages.

// Version number.
//
// In version 0, if the "repeated xxx" representations contain only one
// element, that element is repeated to fill the shape. This makes it easy
// to represent a constant Tensor with a single value.
int32 version_number = 3;

// Serialized raw tensor content from either Tensor::AsProtoTensorContent or
// memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
// can be used for all tensor types. The purpose of this representation is to
// reduce serialization overhead during RPC call by avoiding serialization of
// many repeated small items.
bytes tensor_content = 4;

// Type specific representations that make it easy to create tensor protos in
// all languages. Only the representation corresponding to "dtype" can
// be set. The values hold the flattened representation of the tensor in
// row major order.

// DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
// have some pointless zero padding for each value here.
repeated int32 half_val = 13 [packed = true];

// DT_FLOAT.
repeated float float_val = 5 [packed = true];

// DT_DOUBLE.
repeated double double_val = 6 [packed = true];

// DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
repeated int32 int_val = 7 [packed = true];

// DT_STRING
repeated bytes string_val = 8;

// DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
// and imaginary parts of i-th single precision complex.
repeated float scomplex_val = 9 [packed = true];

// DT_INT64
repeated int64 int64_val = 10 [packed = true];

// DT_BOOL
repeated bool bool_val = 11 [packed = true];

// DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
// and imaginary parts of i-th double precision complex.
repeated double dcomplex_val = 12 [packed = true];

// DT_RESOURCE
repeated ResourceHandleProto resource_handle_val = 14;

// DT_VARIANT
repeated VariantTensorDataProto variant_val = 15;

// DT_UINT32
repeated uint32 uint32_val = 16 [packed = true];

// DT_UINT64
repeated uint64 uint64_val = 17 [packed = true];
};

// Protocol buffer representing the serialization format of DT_VARIANT tensors.
message VariantTensorDataProto {
// Name of the type of objects being serialized.
string type_name = 1;
// Portions of the object that are not Tensors.
bytes metadata = 2;
// Tensors contained within objects being serialized.
repeated TensorProto tensors = 3;
}

+ 0
- 45
ge/common/proto/tensorflow/tensor_shape.proto View File

@@ -1,45 +0,0 @@
// Protocol buffer representing the shape of tensors.

syntax = "proto3";
option cc_enable_arenas = true;
option java_outer_classname = "TensorShapeProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

package domi.tensorflow;

// Dimensions of a tensor.
message TensorShapeProto {
// One dimension of the tensor.
message Dim {
// Size of the tensor in that dimension.
// This value must be >= -1, but values of -1 are reserved for "unknown"
// shapes (values of -1 mean "unknown" dimension). Certain wrappers
// that work with TensorShapeProto may fail at runtime when deserializing
// a TensorShapeProto containing a dim value of -1.
int64 size = 1;

// Optional name of the tensor dimension.
string name = 2;
};

// Dimensions of the tensor, such as {"input", 30}, {"output", 40}
// for a 30 x 40 2D tensor. If an entry has size -1, this
// corresponds to a dimension of unknown size. The names are
// optional.
//
// The order of entries in "dim" matters: It indicates the layout of the
// values in the tensor in-memory representation.
//
// The first entry in "dim" is the outermost dimension used to layout the
// values, the last entry is the innermost dimension. This matches the
// in-memory layout of RowMajor Eigen tensors.
//
// If "dim.size()" > 0, "unknown_rank" must be false.
repeated Dim dim = 2;

// If true, the number of dimensions in the shape is unknown.
//
// If true, "dim.size()" must be 0.
bool unknown_rank = 3;
};

+ 0
- 74
ge/common/proto/tensorflow/types.proto View File

@@ -1,74 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TypesProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// LINT.IfChange
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field
// has not been set.
DT_INVALID = 0;

// Data types that all computation devices are expected to be
// capable to support.
DT_FLOAT = 1;
DT_DOUBLE = 2;
DT_INT32 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_INT8 = 6;
DT_STRING = 7;
DT_COMPLEX64 = 8; // Single-precision complex
DT_INT64 = 9;
DT_BOOL = 10;
DT_QINT8 = 11; // Quantized int8
DT_QUINT8 = 12; // Quantized uint8
DT_QINT32 = 13; // Quantized int32
DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops.
DT_QINT16 = 15; // Quantized int16
DT_QUINT16 = 16; // Quantized uint16
DT_UINT16 = 17;
DT_COMPLEX128 = 18; // Double-precision complex
DT_HALF = 19;
DT_RESOURCE = 20;
DT_VARIANT = 21; // Arbitrary C++ data types
DT_UINT32 = 22;
DT_UINT64 = 23;

// Do not use! These are only for parameters. Every enum above
// should have a corresponding value below (verified by types_test).
DT_FLOAT_REF = 101;
DT_DOUBLE_REF = 102;
DT_INT32_REF = 103;
DT_UINT8_REF = 104;
DT_INT16_REF = 105;
DT_INT8_REF = 106;
DT_STRING_REF = 107;
DT_COMPLEX64_REF = 108;
DT_INT64_REF = 109;
DT_BOOL_REF = 110;
DT_QINT8_REF = 111;
DT_QUINT8_REF = 112;
DT_QINT32_REF = 113;
DT_BFLOAT16_REF = 114;
DT_QINT16_REF = 115;
DT_QUINT16_REF = 116;
DT_UINT16_REF = 117;
DT_COMPLEX128_REF = 118;
DT_HALF_REF = 119;
DT_RESOURCE_REF = 120;
DT_VARIANT_REF = 121;
DT_UINT32_REF = 122;
DT_UINT64_REF = 123;
}
// LINT.ThenChange(
// https://www.tensorflow.org/code/tensorflow/c/c_api.h,
// https://www.tensorflow.org/code/tensorflow/go/tensor.go,
// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc,
// https://www.tensorflow.org/code/tensorflow/core/framework/types.h,
// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc,
// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py,
// https://www.tensorflow.org/code/tensorflow/python/framework/function.py)

+ 0
- 31
ge/common/proto/tensorflow/versions.proto View File

@@ -1,31 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "VersionsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Version information for a piece of serialized data
//
// There are different types of versions for each type of data
// (GraphDef, etc.), but they all have the same common shape
// described here.
//
// Each consumer has "consumer" and "min_producer" versions (specified
// elsewhere). A consumer is allowed to consume this data if
//
// producer >= min_producer
// consumer >= min_consumer
// consumer not in bad_consumers
//
message VersionDef {
// The version of the code that produced this data.
int32 producer = 1;

// Any consumer below this version is not allowed to consume this data.
int32 min_consumer = 2;

// Specific consumer versions which are disallowed (e.g. due to bugs).
repeated int32 bad_consumers = 3;
};

+ 0
- 115
ge/executor/CMakeLists.txt View File

@@ -1,115 +0,0 @@
set(PROTO_LIST
"${METADEF_DIR}/proto/om.proto"
"${METADEF_DIR}/proto/ge_ir.proto"
"${METADEF_DIR}/proto/insert_op.proto"
"${METADEF_DIR}/proto/task.proto"
"${METADEF_DIR}/proto/op_mapping_info.proto"
"${METADEF_DIR}/proto/dump_task.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

set(SRC_LIST
"ge_executor.cc"
"../common/profiling/profiling_manager.cc"
"../common/ge/plugin_manager.cc"
"../common/ge/op_tiling_manager.cc"
"../common/dump/dump_properties.cc"
"../common/dump/dump_manager.cc"
"../common/dump/dump_op.cc"
"../graph/load/graph_loader.cc"
"../graph/execute/graph_execute.cc"
"../omm/csa_interact.cc"
"../graph/manager/graph_manager_utils.cc"
"../graph/manager/graph_var_manager.cc"
"../graph/manager/graph_mem_allocator.cc"
"../graph/manager/graph_caching_allocator.cc"
"../graph/manager/trans_var_data_utils.cc"
"../graph/manager/util/debug.cc"
"../graph/manager/rdma_pool_allocator.cc"
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc"
"../model/ge_model.cc"
"../model/ge_root_model.cc"
"../graph/load/new_model_manager/davinci_model.cc"
"../graph/load/new_model_manager/davinci_model_parser.cc"
"../graph/load/new_model_manager/model_manager.cc"
"../graph/load/new_model_manager/tbe_handle_store.cc"
"../graph/load/new_model_manager/cpu_queue_schedule.cc"
"../graph/load/new_model_manager/model_utils.cc"
"../graph/load/new_model_manager/aipp_utils.cc"
"../graph/load/new_model_manager/data_inputer.cc"
"../graph/load/new_model_manager/data_dumper.cc"
"../graph/load/new_model_manager/zero_copy_task.cc"
"../graph/load/new_model_manager/zero_copy_offset.cc"
"../graph/load/new_model_manager/task_info/task_info.cc"
"../graph/load/new_model_manager/task_info/event_record_task_info.cc"
"../graph/load/new_model_manager/task_info/event_wait_task_info.cc"
"../graph/load/new_model_manager/task_info/fusion_start_task_info.cc"
"../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc"
"../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc"
"../graph/load/new_model_manager/task_info/kernel_task_info.cc"
"../graph/load/new_model_manager/task_info/label_set_task_info.cc"
"../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc"
"../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc"
"../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc"
"../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc"
"../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_active_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_switch_task_info.cc"
"../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc"
"../graph/load/new_model_manager/task_info/end_graph_task_info.cc"
"../graph/load/new_model_manager/task_info/model_exit_task_info.cc"
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc"
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc"
"../opskernel_manager/ops_kernel_builder_manager.cc"
"../single_op/single_op_manager.cc"
"../single_op/single_op_model.cc"
"../single_op/single_op.cc"
"../single_op/stream_resource.cc"
"../single_op/task/op_task.cc"
"../single_op/task/build_task_utils.cc"
"../single_op/task/tbe_task_builder.cc"
"../single_op/task/aicpu_task_builder.cc"
"../single_op/task/aicpu_kernel_task_builder.cc"
"../hybrid/hybrid_davinci_model_stub.cc"
)

######## libge_executor.a ########
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_executor PRIVATE
-Werror
-O2
)

target_compile_definitions(ge_executor PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
DAVINCI_SUPPORT_PROFILING
)

target_include_directories(ge_executor PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/cce
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_executor PRIVATE
$<BUILD_INTERFACE:intf_pub>
json
protobuf
c_sec
-lrt
-ldl
)

+ 0
- 206
ge/executor/proto/ge_ir.proto View File

@@ -1,206 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package ge.proto;

enum DataType
{
DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set.
DT_FLOAT = 1; // float type
DT_FLOAT16 = 2; // fp16 type
DT_INT8 = 3; // int8 type
DT_UINT8 = 4; // uint8 type
DT_INT16 = 5; // int16 type
DT_UINT16 = 6; // uint16 type
DT_INT32 = 7; //
DT_INT64 = 8; // int64 type
DT_UINT32 = 9; // unsigned int32
DT_UINT64 = 10; // unsigned int64
DT_BOOL = 11; // bool type
DT_DOUBLE = 12; // double type
DT_STRING = 13; // string type
DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */
DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */
DT_COMPLEX64 = 16; // complex64 type
DT_COMPLEX128 = 17; // complex128 type
DT_QINT8 = 18; // qint8 type
DT_QINT16 = 19; // qint16 type
DT_QINT32 = 20; // qint32 type
DT_QUINT8 = 21; // quint8 type
DT_QUINT16 = 22; // quint16 type
DT_RESOURCE = 23; // resource type
DT_STRING_REF = 24; // string_ref type
DT_DUAL = 25; /**< dual output type */
}

message AttrDef
{
message ListValue
{
enum ListValueType{
VT_LIST_NONE = 0;
VT_LIST_STRING = 1;
VT_LIST_INT = 2;
VT_LIST_FLOAT = 3;
VT_LIST_BOOL = 4;
VT_LIST_BYTES = 5;
VT_LIST_TENSOR_DESC = 6;
VT_LIST_TENSOR = 7;
VT_LIST_GRAPH = 8;
VT_LIST_NAMED_ATTRS = 9;
VT_LIST_DATA_TYPE = 10;
}
repeated bytes s = 2; // "list(string)"
repeated int64 i = 3; // "list(int)"
repeated float f = 4; // "list(float)"
repeated bool b = 5; // "list(bool)"
repeated bytes bt = 7;
repeated TensorDescriptor td = 8;
repeated TensorDef t = 9;
repeated GraphDef g = 10;
repeated NamedAttrs na = 11;
repeated int64 dt = 12; // list ge::DataType

ListValueType val_type = 20;
}

message ListListInt{
message ListInt{
repeated int64 list_i = 1; // list int
}
repeated ListInt list_list_i = 1; // list list int
}

oneof value
{
bytes s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10; // Used to support attr nesting
TensorDescriptor td = 11; // GeTensorDesc type
TensorDef t = 12; // GeTensor type
GraphDef g = 13; // Graph type
ListListInt list_list_int = 14; // List List Int type
int64 dt = 15; // ge::DataType
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs
{
string name = 1;
map<string, AttrDef> attr = 2;
}

// Shape / dimension description, using row-major order
message ShapeDef
{
repeated int64 dim = 1; // Size of each dimension
}

// Multidimensional data description
message TensorDescriptor
{
string name = 1; // Optional parameter, tensor name

DataType dtype = 2; // tensor datatype
ShapeDef shape = 3; // Shape / dimension
string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND"

bool has_out_attr = 9;
int64 size = 10;
int64 weight_size = 11;
bool reuse_input = 12;
bool output_tensor = 13;
string device_type = 14;
bool input_tensor =15;
int64 real_dim_cnt = 16;
int64 reuse_input_index = 17;
int64 data_offset = 18;
int64 cmps_size = 19;
string cmps_tab = 20;
int64 cmps_tab_offset = 21;

map<string, AttrDef> attr = 5; // Set of extra parameter fields
}

// GeTensor definition
message TensorDef
{
TensorDescriptor desc = 1; // Tensor description
bytes data = 2; // Tensor data
}


// Operator description
message OpDef
{
string name = 1; // name
string type = 2; // type

repeated string input = 5; // input original op name + outgoing index. op_name:index

map<string, AttrDef> attr = 10; // Set of operator parameter fields

bool has_out_attr = 20;
int64 id = 21;
int64 stream_id =22;
repeated string input_name = 23;
repeated string src_name = 24;
repeated int64 src_index = 25;
repeated string dst_name = 26;
repeated int64 dst_index = 27;
repeated int64 input_i = 28;
repeated int64 output_i = 29;
repeated int64 workspace = 30;
repeated int64 workspace_bytes = 31;
repeated bool is_input_const = 32;
repeated TensorDescriptor input_desc = 33;
repeated TensorDescriptor output_desc = 34;
repeated string subgraph_name = 35;
}

// Graph definition
message GraphDef
{
string name = 1; // name

repeated string input = 4; // Graph input
repeated string output = 5; // Graph output

repeated OpDef op = 6; // List of operators

map<string, AttrDef> attr = 11; // Extended field
}

// model definition
message ModelDef
{
string name = 1; // name
uint32 version = 2; // IR Proto verion
string custom_version = 3; // User model version number, passed in by user

repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef

map<string, AttrDef> attr = 11; // Extended field
}


+ 0
- 152
ge/executor/proto/insert_op.proto View File

@@ -1,152 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message InsertNewOps {
repeated AippOpParams aipp_op = 1;
repeated MultiShapeOpParams multi_shape_op = 2;
}

message AippOpParams {
enum InputFormat {
UNDEFINED = 0;
YUV420SP_U8 = 1;
XRGB8888_U8 = 2;
RGB888_U8 = 3;
YUV400_U8 = 4;
NC1HWC0DI_FP16 = 5;
NC1HWC0DI_S8 = 6;
ARGB8888_U8 = 7;
YUYV_U8 = 8;
YUV422SP_U8 = 9;
AYUV444_U8 = 10;
RAW10 = 11;
RAW12 = 12;
RAW16 = 13;
RAW24 = 14;
RGB16 = 15;
RGB20 = 16;
RGB24 = 17;
RGB8_IR = 18;
RGB16_IR = 19;
RGB24_IR = 20;
}

enum AippMode {
undefined = 0;
static = 1;
dynamic = 2;
}

// AIPP模式,区分静态AIPP和动态AIPP
AippMode aipp_mode = 1;

// related_input_rank参数为必填,类型为整型,配置范围>=0, <=输入Data算子的个数,默认值为0。
// 标识对模型的第几个输入做AIPP处理,例如模型有两个输入,需要对第2个输入做AIPP,则配置related_input_rank为1。
uint32 related_input_rank = 2;

// input_edge_idx参数为可选,类型为整型,配置范围为>=0。
// 配置该参数的作用,在于对Data算子不同的输出做不同的AIPP处理,如果该参数没有配置,默认对related_input_rank指定的模型输入的所有输出边做AIPP。
// 配置值 <= Data算子输出边的个数。
repeated uint32 input_edge_idx = 3;

// [Begin] 动态AIPP参数,配置静态AIPP时无效
uint32 max_src_image_size = 4;

// 是否支持旋转。默认不支持,开启支持旋转时,会有额外的空间和性能损失
bool support_rotation = 5;

// [End] 动态AIPP参数


// [Begin] 静态AIPP参数,配置动态AIPP时无效
InputFormat input_format = 51;
bool csc_switch = 52;
float cpadding_value = 53;
bool rbuv_swap_switch = 54;
bool ax_swap_switch = 55;
bool single_line_mode = 56;

int32 src_image_size_w = 57;
int32 src_image_size_h = 58;

bool crop = 59;
int32 load_start_pos_w = 60;
int32 load_start_pos_h = 61;
int32 crop_size_w = 62;
int32 crop_size_h = 63;

bool resize = 64;
int32 resize_output_w = 65;
int32 resize_output_h = 66;

bool padding = 67;
int32 left_padding_size = 68;
int32 right_padding_size = 69;
int32 top_padding_size = 70;
int32 bottom_padding_size = 71;

int32 mean_chn_0 = 10;
int32 mean_chn_1 = 11;
int32 mean_chn_2 = 12;
int32 mean_chn_3 = 19;
float min_chn_0 = 13;
float min_chn_1 = 14;
float min_chn_2 = 15;
float min_chn_3 = 20;
repeated float var_reci_chn_0 = 16;
repeated float var_reci_chn_1 = 17;
repeated float var_reci_chn_2 = 18;
repeated float var_reci_chn_3 = 21;

repeated int32 matrix_r0c0 = 30;
repeated int32 matrix_r0c1 = 31;
repeated int32 matrix_r0c2 = 32;
repeated int32 matrix_r1c0 = 33;
repeated int32 matrix_r1c1 = 34;
repeated int32 matrix_r1c2 = 35;
repeated int32 matrix_r2c0 = 36;
repeated int32 matrix_r2c1 = 37;
repeated int32 matrix_r2c2 = 38;
repeated int32 output_bias_0 = 39;
repeated int32 output_bias_1 = 40;
repeated int32 output_bias_2 = 41;
repeated int32 input_bias_0 = 42;
repeated int32 input_bias_1 = 43;
repeated int32 input_bias_2 = 44;

// [End] 静态AIPP参数

// The n number that is used for raw/rgbir data into f16 transformation.
// The transformation equation is x/(2^n). If set to 0, no transform is performed.
uint32 raw_rgbir_to_f16_n = 45;
}

message MultiShapeOpParams {
enum MultiShapeMode {
batch = 0; //动态batch
resolution = 1; //动态分辨率,扩展用
}

MultiShapeMode mode = 1; //算子模式
uint32 related_input_rank = 2; //新增算子插入到哪个输入


repeated uint32 batch_list = 11; //batch_list值,batch_list的个数是2到8之间
}

+ 0
- 401
ge/executor/proto/om.proto View File

@@ -1,401 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

enum TargetType
{
MINI = 0;
TINY = 1;
LITE = 2;
}

// offline model
message ModelDef {
string name = 1;
uint32 version = 2;

uint64 memory_size = 10;
uint32 stream_num = 11;
uint32 event_num = 12;
uint64 weight_size = 13;
uint32 label_num = 15;
repeated OpDef op = 20;
TargetType target_type = 23;

map<string, AttrDef> attr = 30;
};

// operator define
message OpDef {
string name = 1;
string type = 2;

uint32 id = 3;
uint32 stream_id = 4;

repeated string input_name = 5;

repeated string src_name = 8;
repeated int32 src_index = 9;
repeated int64 input = 10;
repeated int64 output = 11;
repeated TensorDescriptor input_desc = 12;
repeated TensorDescriptor output_desc = 13;
repeated WeightDef weights = 14;
repeated string dst_name = 15;
repeated int32 dst_index = 16;

repeated int64 workspace = 20;
repeated uint32 workspace_bytes = 21;

repeated string weight_name = 22;
repeated bool is_input_const = 23;

map<string, AttrDef> attr = 30;

QuantizeFactorParams quantize_factor = 31;

oneof op_params {
// start at 100 here
SendOpParams sender_param = 100;
RecvOpParams receiver_param = 200;
ConvolutionOpParams convolution_param = 300;
PoolingOpParams pooling_param = 400;
EltwiseOpParams eltwise_param = 500;
BatchNormOpParams batchnorm_param = 600;
ScaleOpParams scale_param = 700;
FullConnectionOpParams full_connection_param = 800;
SoftmaxOpParams softmax_param = 900;
ActivationOpParams activation_param = 1000;
ReshapeOpParams reshape_param = 1100;
}
};

message SendOpParams {
uint32 event_id = 1;
};

message RecvOpParams {
uint32 event_id = 1;
};

enum QuantizeScaleType
{
VECTOR_SCALE = 0;
SCALAR_SCALE = 1;
}

enum QuantizeScaleMode
{
NORMAL_MODE = 0;
SQRT_MODE = 1;
}

enum QuantizeAlgorithm
{
NON_OFFSET_ALGO = 0;
HALF_OFFSET_ALGO = 1;
ALL_OFFSET_ALGO = 2;
}
message QuantizeFactor
{
QuantizeScaleMode scale_mode = 1;
bytes scale_value = 2;
int64 scale_offset = 3;
bytes offset_data_value = 4;
int64 offset_data_offset = 5;
bytes offset_weight_value = 6;
int64 offset_weight_offset = 7;
bytes offset_pad_value = 8;
int64 offset_pad_offset = 9;
};

message QuantizeCalcFactor
{
bytes offsetw = 1;
int64 offsetw_offset = 2;
bytes offsetd = 3;
int64 offsetd_offset = 4;
bytes scalereq = 5;
int64 scaledreq_offset = 6;
bytes offsetdnext = 7;
int64 offsetdnext_offset = 8;
}

message QuantizeFactorParams
{
QuantizeAlgorithm quantize_algo = 1;
QuantizeScaleType scale_type = 2;
QuantizeFactor quantize_param = 3;
QuantizeFactor dequantize_param = 4;
QuantizeFactor requantize_param = 5;
QuantizeCalcFactor quantizecalc_param = 6;
};

message ConvolutionOpParams {
int32 mode = 1;
int32 algo = 2;
int32 pad_mode = 3;
uint32 group = 4;
uint32 num_output = 5;

repeated uint32 pad = 10;
repeated uint32 stride = 11;
repeated uint32 dilation = 12;
repeated uint32 kernel = 13;

float alpha = 20;
float beta = 21;

WeightDef filter = 40;
WeightDef bias = 41;

bool relu_flag = 62;
repeated uint32 adj = 70;
repeated uint32 target_shape = 71;
repeated uint32 before_pad = 72;
};

message PoolingOpParams {
int32 mode = 1;
int32 nan_opt = 2;
int32 pad_mode = 3;
bool global_pooling = 4;

repeated uint32 window = 10;
repeated uint32 pad = 11;
repeated uint32 stride = 12;
bool ceil_mode = 13;
int32 data_mode = 14;

float alpha = 20;
float beta = 21;
repeated uint32 before_pad = 22;
};

message EltwiseOpParams {
int32 mode = 1;
repeated float coeff = 2;
float alpha = 3;
float beta = 4;
repeated WeightDef weight = 5;
bool relu_flag = 6;
};

message ActivationOpParams {
int32 mode = 1;
float coef = 2;
float alpha = 3;
float beta = 4;
};

message BatchNormOpParams {
int32 mode = 1;

float alpha = 2;
float beta = 3;
double epsilon = 4;//optinal,[default = 1e-5]
bool use_global_stats = 5; //optinal,by default true,testing mode
float moving_average_fraction = 6; //optinal,[default = .999];

WeightDef estimated_mean = 7;
WeightDef estimated_variance = 8;

WeightDef scale = 9;
WeightDef bias = 10;
};

message ScaleOpParams {
WeightDef scale = 1;
WeightDef bias = 2;
};

message ReshapeOpParams {
float alpha = 1;
float beta = 2;
ShapeDef shape = 3;
int32 axis = 4;
int32 num_axes = 5;
int32 format = 6;
};

message SoftmaxOpParams {
int32 algo = 1;
int32 mode = 2;
float alpha = 3;
float beta = 4;
};

message FullConnectionOpParams {
WeightDef filter = 1;
WeightDef bias = 2;
uint32 num_output = 3;
bool relu_flag = 12;
};

message FlattenOpParams {
float alpha = 1;
float beta = 2;
int32 start_axis = 3;
int32 end_axis = 4;
}

message AddLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message MulLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message AddOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message MulOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message SubOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message BiasAddOpParams {
float alpha = 1;
float beta = 2;

WeightDef bias = 10;
};

message MatMulOpParams {
float alpha = 1;
float beta = 2;
bool transposeX = 3;
bool transposeW = 4;

WeightDef filter = 10;
WeightDef bias = 12;
};

message RsqrtOpParams {
float alpha = 1;
float beta = 2;
};


message WeightDef {
int32 format = 1;
int32 data_type = 2;
ShapeDef shape = 3;
bytes data = 4;
int64 data_offset = 5;
uint32 cmps_size = 6;
bytes cmps_tab = 7;
int64 cmps_tab_offset = 10;
CompressInfo cmps_info = 8;
AllOffsetQuantizeInfo alloffset_quantize_info = 11;
}

message ShapeDef {
repeated int64 dim = 1;
}

enum DeviceType {
NPU = 0; // In default, we will use NPU.
CPU = 1; // CPU
}

message AllOffsetQuantizeInfo {
float scale = 1;
int32 offset = 2;
}

message TensorDescriptor {
int32 format = 1;
int32 data_type = 2;
repeated int64 dim = 3;
uint32 size = 4;
bool reuse_input = 5;
bool output_tensor = 7;
DeviceType device_type = 8;
bool input_tensor = 9;
uint32 real_dim_cnt = 10;
uint32 reuse_input_index = 11;
AllOffsetQuantizeInfo alloffset_quantize_info = 12;
}

message CompressInfo {
int32 blockRow = 1; // block row
int32 blockCol = 2; // block col
int32 fractalK = 3; // fractal K
int32 fractalN = 4; // fractal N
int32 lastFractalK = 5; // K of last fractal
int32 lastFractalN = 6; // N of last fractal
int32 cubeSize = 7; // cube's length
int32 loadDir = 8; // data load directtiono 0:col load 1:row load
}

message AttrDef {
message ListValue {
repeated string s = 2; // "list(string)"
repeated int64 i = 3 [packed = true]; // "list(int)"
repeated float f = 4 [packed = true]; // "list(float)"
repeated bool b = 5 [packed = true]; // "list(bool)"
repeated uint32 u = 6 [packed = true]; // "list(uint)"
repeated bytes bt = 7;
}

oneof value {
string s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
uint32 u = 6; // "uint32"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10;
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs {
string name = 1;
map<string, AttrDef> attr = 2;
}


+ 0
- 89
ge/executor/proto/op_mapping_info.proto View File

@@ -1,89 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";
package aicpu.dump;

message Shape {
repeated uint64 dim = 1;
}

message Output {
int32 data_type = 1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
string original_name = 5;
int32 original_output_index = 6;
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
}

message Input {
int32 data_type =1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
}

enum BufferType {
L1 = 0;
}

message OpBuffer {
BufferType buffer_type = 1;
uint64 address = 2;
uint64 size = 3;
}

message Op {
string op_name = 1;
string op_type = 2;
}

message Task {
uint32 task_id = 1;
uint32 stream_id = 2;
Op op = 3;
repeated Output output = 4;
bool end_graph = 5;
repeated Input input = 6;
repeated OpBuffer buffer = 7;
}

message OpMappingInfo {
string dump_path = 1;
oneof model_name_param {
string model_name = 2;
}
oneof model_id_param {
uint32 model_id = 3;
}
oneof step_id {
uint64 step_id_addr = 4;
}
oneof iterations_per_loop {
uint64 iterations_per_loop_addr = 5;
}
oneof loop_cond {
uint64 loop_cond_addr = 6;
}
uint32 flag = 7; // 0x01 load, 0x00 unload
repeated Task task = 8;
string dump_step = 9;
}

+ 0
- 170
ge/executor/proto/task.proto View File

@@ -1,170 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message ModelTaskDef {
string version = 1;

map<string, string> attr = 9; // Extended field
repeated TaskDef task = 10;

uint64 memory_size = 11;
uint32 stream_num = 12;
uint32 event_num = 13;
uint64 weight_size = 14;

repeated bytes op = 15; // input/output opdef in bytes

uint64 base_addr = 16; // base addr
uint64 weight_addr = 17; // weight addr
uint32 batch_num = 18;
}


message TaskDef {
uint32 id = 1;
uint32 type = 2;

uint32 stream_id = 10;
uint32 event_id = 11;

KernelDef kernel = 20;
KernelExDef kernel_ex = 21;
KernelHcclDef kernel_hccl = 25;
EventExDef event_ex = 26;
LogTimeStampDef log_timestamp = 28;

uint32 label_id = 30;

MemcpyAsyncDef memcpy_async = 31;
StreamSwitchDef stream_switch = 32;
StreamActiveDef stream_active = 33;
bytes private_def = 34;
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
StreamSwitchNDef stream_switch_n = 36;

LabelSetDef label_set = 37;
LabelGotoExDef label_goto_ex = 38;
LabelSwitchByIndexDef label_switch_by_index = 39;
}

message KernelDef {
KernelContext context = 1;

string stub_func = 10;
uint32 block_dim = 11;
uint32 args_size = 12;
bytes args = 13;
bytes sm_desc = 14;
bytes flowtable = 15;
string so_name = 16;
string kernel_name = 17;
bytes kernel_ext_info = 18;
uint32 kernel_ext_info_size = 19;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2; // OP type in CCE
uint32 kernel_func_id = 3;
uint32 op_index = 4; // TE/Custom operator
bool is_flowtable = 5; // Identify whether args is a flowtable structure
bytes args_offset = 6; // args offset information
uint32 args_count = 7; // args count
repeated uint32 origin_op_index = 8;
}


message KernelExDef {
uint32 flags = 1;

uint32 op_index = 4;
uint32 args_size = 12;
bytes args = 13;
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
uint32 task_info_size = 15;
bytes kernel_ext_info = 16;
uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
uint32 op_index = 8;
string hccl_type = 9;
}


message EventExDef {
uint32 op_index = 1;
uint32 event_type = 2;
}

message LogTimeStampDef {
uint64 logid = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
uint32 op_index = 6;
}

message StreamSwitchDef {
uint32 op_index = 1;
uint32 true_stream_id = 2;
int64 value = 3;
uint64 value_ptr = 4;
uint32 data_type = 5;
}

message StreamActiveDef {
uint32 op_index = 1;
uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
uint32 op_index = 1;
uint32 size = 2;
repeated int64 target_value = 3;
repeated uint32 true_stream_id = 4;
uint32 element_size = 5;
uint32 data_type = 6;
}

message LabelSetDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelGotoExDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
uint32 op_index = 1;
uint32 label_max = 2;
}

+ 0
- 225
ge/ge_local_engine/CMakeLists.txt View File

@@ -1,225 +0,0 @@
set(PROTO_LIST
"${METADEF_DIR}/proto/task.proto"
)

set(SRC_LIST
"engine/ge_local_engine.cc"
"ops_kernel_store/ge_local_ops_kernel_info.cc"
"ops_kernel_store/op/op_factory.cc"
"ops_kernel_store/op/op.cc"
"ops_kernel_store/op/ge_deleted_op.cc"
"ops_kernel_store/op/no_op.cc"
)

set(OPS_KERNEL_SRC_LIST
"ops_kernel_store/ge_local_ops_kernel_builder.cc"
"ops_kernel_store/op/op_factory.cc"
"ops_kernel_store/op/op.cc"
"ops_kernel_store/op/ge_deleted_op.cc"
"ops_kernel_store/op/no_op.cc"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

############ libge_local_engine.so ############
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_local_engine PRIVATE
-Werror
)

target_include_directories(ge_local_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_local_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
graph
protobuf
register
c_sec
slog
runtime
-Wl,--as-needed
)

######### atclib/libge_local_engine.so #############
add_library(atc_ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc_ge_local_engine PRIVATE
-Werror
)

target_compile_definitions(atc_ge_local_engine PRIVATE
COMPILE_OMG_PACKAGE
)

target_include_directories(atc_ge_local_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(atc_ge_local_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
graph
protobuf
register
c_sec
slog
runtime_compile
-Wl,--as-needed
)

set_target_properties(atc_ge_local_engine PROPERTIES
OUTPUT_NAME ge_local_engine
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libge_local_opskernel_builder.so ############
add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_local_opskernel_builder PRIVATE
-Werror
)

target_include_directories(ge_local_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_local_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
slog
register
graph
-Wl,--as-needed
)

############ atclib/libge_local_opskernel_builder.so ############
add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc_ge_local_opskernel_builder PRIVATE
-Werror
)

target_include_directories(atc_ge_local_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(atc_ge_local_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
slog
register
graph
-Wl,--as-needed
)

set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
OUTPUT_NAME ge_local_opskernel_builder
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libge_local_opskernel_builder.a ############
add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})

target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror
)

target_include_directories(ge_local_opskernel_builder_static PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(ge_local_opskernel_builder_static PRIVATE
$<BUILD_INTERFACE:intf_pub>
protobuf
c_sec
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS ge_local_engine ge_local_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

install(TARGETS atc_ge_local_engine atc_ge_local_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib
)

+ 0
- 170
ge/ge_local_engine/proto/task.proto View File

@@ -1,170 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message ModelTaskDef {
string version = 1;

map<string, string> attr = 9; // Extended field
repeated TaskDef task = 10;

uint64 memory_size = 11;
uint32 stream_num = 12;
uint32 event_num = 13;
uint64 weight_size = 14;

repeated bytes op = 15; // input/output opdef in bytes

uint64 base_addr = 16; // base addr
uint64 weight_addr = 17; // weight addr
uint32 batch_num = 18;
}


message TaskDef {
uint32 id = 1;
uint32 type = 2;

uint32 stream_id = 10;
uint32 event_id = 11;

KernelDef kernel = 20;
KernelExDef kernel_ex = 21;
KernelHcclDef kernel_hccl = 25;
EventExDef event_ex = 26;
LogTimeStampDef log_timestamp = 28;

uint32 label_id = 30;

MemcpyAsyncDef memcpy_async = 31;
StreamSwitchDef stream_switch = 32;
StreamActiveDef stream_active = 33;
bytes private_def = 34;
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
StreamSwitchNDef stream_switch_n = 36;

LabelSetDef label_set = 37;
LabelGotoExDef label_goto_ex = 38;
LabelSwitchByIndexDef label_switch_by_index = 39;
}

message KernelDef {
KernelContext context = 1;

string stub_func = 10;
uint32 block_dim = 11;
uint32 args_size = 12;
bytes args = 13;
bytes sm_desc = 14;
bytes flowtable = 15;
string so_name = 16;
string kernel_name = 17;
bytes kernel_ext_info = 18;
uint32 kernel_ext_info_size = 19;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2; // OP type in CCE
uint32 kernel_func_id = 3;
uint32 op_index = 4; // TE/Custom operator
bool is_flowtable = 5; // Identify whether args is a flowtable structure
bytes args_offset = 6; // args offset information
uint32 args_count = 7; // args count
repeated uint32 origin_op_index = 8;
}


message KernelExDef {
uint32 flags = 1;

uint32 op_index = 4;
uint32 args_size = 12;
bytes args = 13;
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
uint32 task_info_size = 15;
bytes kernel_ext_info = 16;
uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
uint32 op_index = 8;
string hccl_type = 9;
}


message EventExDef {
uint32 op_index = 1;
uint32 event_type = 2;
}

message LogTimeStampDef {
uint64 logid = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
uint32 op_index = 6;
}

message StreamSwitchDef {
uint32 op_index = 1;
uint32 true_stream_id = 2;
int64 value = 3;
uint64 value_ptr = 4;
uint32 data_type = 5;
}

message StreamActiveDef {
uint32 op_index = 1;
uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
uint32 op_index = 1;
uint32 size = 2;
repeated int64 target_value = 3;
repeated uint32 true_stream_id = 4;
uint32 element_size = 5;
uint32 data_type = 6;
}

message LabelSetDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelGotoExDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
uint32 op_index = 1;
uint32 label_max = 2;
}

+ 0
- 65
ge/ge_runtime/CMakeLists.txt View File

@@ -1,65 +0,0 @@
# Copyright 2019-2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
############ libge_runtime.so ############
set(GE_SRC_LIST
"model_runner.cc"
"runtime_model.cc"
"output.cc"
"task/*.cc"
)

add_library(ge_runtime SHARED ${GE_SRC_LIST})

target_compile_options(ge_runtime PRIVATE
-Werror
-O2
)

target_compile_definitions(ge_runtime PUBLIC
PROTOBUF_INLINE_NOT_IN_HEADERS=0
)

target_include_directories(ge_runtime PRIVATE
${TOP_DIR}
${TOP_DIR}/inc
${TOP_DIR}/inc/graph
${TOP_DIR}/inc/external
${TOP_DIR}/inc/framework
${TOP_DIR}/inc/framework/common
${TOP_DIR}/inc/framework/ge_runtime
${TOP_DIR}/inc/cce
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
)

target_link_libraries(ge_runtime PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
graph
slog
runtime
c_sec
-Wl,--as-needed
-lrt
-ldl
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS ge_runtime OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

+ 0
- 66
ge/ge_runtime/module.mk View File

@@ -1,66 +0,0 @@
LOCAL_PATH := $(call my-dir)

# task.proto is old task, add it for ops_kernel_info_store
local_ge_runtime_src_files := \
model_runner.cc \
runtime_model.cc \
output.cc \
task/aicpu_task.cc \
task/cce_task.cc \
task/tbe_task.cc \
task/event_record_task.cc \
task/event_wait_task.cc \
task/stream_active_task.cc \
task/stream_switch_task.cc \
task/hccl_task.cc \
task/memcpy_async_task.cc \
task/profiler_task.cc \

local_ge_runtime_include := \
$(LOCAL_PATH)/ \
$(TOPDIR)libc_sec/include \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/graph \
$(TOPDIR)inc \
$(LOCAL_PATH)/../ \
third_party/protobuf/include

local_ge_runtime_shared_library := \
libruntime \
libslog \
libc_sec

local_ge_runtime_ldflags := -lrt -ldl

# compile device libge_runtime
include $(CLEAR_VARS)

LOCAL_MODULE := libge_runtime
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
LOCAL_CFLAGS += -Werror
LOCAL_SRC_FILES := $(local_ge_runtime_src_files)
LOCAL_C_INCLUDES := $(local_ge_runtime_include)
LOCAL_SHARED_LIBRARIES := $(local_ge_runtime_shared_library)
LOCAL_LDFLAGS += $(local_ge_runtime_ldflags)

include $(BUILD_SHARED_LIBRARY)

# compile host libge_runtime
include $(CLEAR_VARS)

LOCAL_MODULE := libge_runtime
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
LOCAL_CFLAGS += -O2
endif
LOCAL_SRC_FILES := $(local_ge_runtime_src_files)
LOCAL_C_INCLUDES := $(local_ge_runtime_include)
LOCAL_SHARED_LIBRARIES := $(local_ge_runtime_shared_library)
LOCAL_LDFLAGS += $(local_ge_runtime_ldflags)

include $(BUILD_HOST_SHARED_LIBRARY)

+ 0
- 38
ge/graph/build/memory/CMakeLists.txt View File

@@ -1,38 +0,0 @@
set(SRC_LIST
"memory_assigner.cc"
"graph_mem_assigner.cc"
"binary_block_mem_assigner.cc"
"block_mem_assigner.cc"
"hybrid_mem_assigner.cc"
"max_block_mem_assigner.cc"
"var_mem_assign_util.cc"
)

############ libge_memory.a ############
add_library(ge_memory STATIC ${SRC_LIST})

target_compile_options(ge_memory PRIVATE
-Werror
-O2
)

target_link_libraries(ge_memory PRIVATE
$<BUILD_INTERFACE:intf_pub>
protobuf
c_sec
)

target_include_directories(ge_memory PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${GE_CODE_DIR}/inc/framework
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

+ 0
- 214
ge/host_cpu_engine/CMakeLists.txt View File

@@ -1,214 +0,0 @@
set(PROTO_LIST
"${METADEF_DIR}/proto/task.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

set(SRC_LIST
"engine/host_cpu_engine.cc"
"ops_kernel_store/host_cpu_ops_kernel_info.cc"
"ops_kernel_store/op/op_factory.cc"
"ops_kernel_store/op/host_op.cc"
)

set(CPU_OPS_KERNEL_LIST
"ops_kernel_store/host_cpu_ops_kernel_builder.cc"
)

############ libhost_cpu_engine.so ############
add_library(host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(host_cpu_engine PRIVATE
-Werror
)

target_include_directories(host_cpu_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
graph
register
slog
runtime
-Wl,--as-needed
)

############ atcstub/libhost_cpu_engine.so ############
add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc_host_cpu_engine PRIVATE
-Werror
)

target_compile_definitions(atc_host_cpu_engine PRIVATE
COMPILE_OMG_PACKAGE
)

target_include_directories(atc_host_cpu_engine PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(atc_host_cpu_engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
graph
register
slog
runtime_compile
-Wl,--as-needed
)

set_target_properties(atc_host_cpu_engine PROPERTIES
OUTPUT_NAME host_cpu_engine
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libhost_cpu_opskernel_builder.so ############
add_library(host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(host_cpu_opskernel_builder PRIVATE
-Werror
)

target_include_directories(host_cpu_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
slog
graph
register
-Wl,--as-needed
)

############ atclib/libhost_cpu_opskernel_builder.so ############
add_library(atc_host_cpu_opskernel_builder SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(atc_host_cpu_opskernel_builder PRIVATE
-Werror
)

target_include_directories(atc_host_cpu_opskernel_builder PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
protobuf
c_sec
slog
graph
register
-Wl,--as-needed
)

set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES
OUTPUT_NAME host_cpu_opskernel_builder
LIBRARY_OUTPUT_DIRECTORY atclib
)

############ libhost_cpu_opskernel_builder.a ############
add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST})

target_compile_options(host_cpu_opskernel_builder_static PRIVATE
-Werror
)

target_include_directories(host_cpu_opskernel_builder_static PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(host_cpu_opskernel_builder_static PRIVATE
$<BUILD_INTERFACE:intf_pub>
protobuf
c_sec
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS host_cpu_engine host_cpu_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

install(TARGETS atc_host_cpu_engine atc_host_cpu_opskernel_builder OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}/atclib
)

+ 0
- 1
ge/host_cpu_engine/proto/task.proto View File

@@ -1 +0,0 @@
../../proto/task.proto

+ 0
- 81
ge/offline/CMakeLists.txt View File

@@ -1,81 +0,0 @@
set(PROTO_LIST
"${METADEF_DIR}/proto/om.proto"
"${METADEF_DIR}/proto/ge_ir.proto"
"${METADEF_DIR}/proto/insert_op.proto"
"${METADEF_DIR}/proto/task.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})

set(SRC_LIST
"main.cc"
"single_op_parser.cc"
"../session/omg.cc"
"../ir_build/atc_ir_common.cc"
)

############ atc ############
add_executable(atc ${SRC_LIST} ${PROTO_HDRS})

target_compile_options(atc PRIVATE
-Werror
-O2
)

target_compile_definitions(atc PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
COMPILE_OMG_PACKAGE
)

target_include_directories(atc PRIVATE
${CMAKE_CURRENT_LIST_DIR}
${GE_CODE_DIR}
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc/external
${GE_CODE_DIR}/common/inc/external
${GE_CODE_DIR}/common/inc/external/graph
${GE_CODE_DIR}/inc
${GE_CODE_DIR}/inc/framework
${METADEF_DIR}/inc
${METADEF_DIR}/inc/graph
${METADEF_DIR}/inc/register
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/external/register
${PARSER_DIR}
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/common
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)

target_link_libraries(atc PRIVATE
$<BUILD_INTERFACE:intf_pub>
protobuf
ge_common
register
c_sec
graph
error_manager
ge_compiler
parser_common
gflags
json
runtime_compile
slog
mmpa
-lrt
-ldl
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS atc OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

+ 0
- 1334
ge/offline/main.cc
File diff suppressed because it is too large
View File


+ 0
- 52
ge/offline/module.mk View File

@@ -1,52 +0,0 @@

LOCAL_PATH := $(call my-dir)

include $(CLEAR_VARS)

LOCAL_MODULE := atc

LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2

LOCAL_SRC_FILES := \
main.cc \
single_op_parser.cc \
../session/omg.cc \
../ir_build/atc_ir_common.cc \

LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/../ ./ \
$(TOPDIR)inc \
$(TOPDIR)inc/external \
$(TOPDIR)inc/external/graph \
$(TOPDIR)inc/framework \
$(TOPDIR)inc/framework/domi \
$(TOPDIR)libc_sec/include \
$(TOPDIR)inc/common/util \
third_party/json/include \
third_party/gflags/include \
third_party/protobuf/include \
proto/om.proto \
proto/ge_ir.proto \
proto/task.proto \
proto/insert_op.proto \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libge_common \
libprotobuf \
libslog \
libgraph \
libregister \
liberror_manager \
libge_compiler \
libruntime_compile \
libparser_common \
liberror_manager \

LOCAL_STATIC_LIBRARIES := libgflags

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_HOST_EXECUTABLE)


+ 0
- 1
ge/offline/proto/ge_ir.proto View File

@@ -1 +0,0 @@
../../../../inc/common/proto/ge_ir.proto

+ 0
- 1
ge/offline/proto/insert_op.proto View File

@@ -1 +0,0 @@
../../../../inc/common/proto/insert_op.proto

+ 0
- 1
ge/offline/proto/om.proto View File

@@ -1 +0,0 @@
../../../../inc/common/proto/om.proto

+ 0
- 1
ge/offline/proto/task.proto View File

@@ -1 +0,0 @@
../../proto/task.proto

+ 0
- 503
ge/offline/single_op_parser.cc View File

@@ -1,503 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "single_op_parser.h"

#include <vector>
#include <algorithm>
#include <fstream>
#include <sstream>

#include <nlohmann/json.hpp>

#include "framework/common/debug/ge_log.h"
#include "common/util/error_manager/error_manager.h"
#include "common/ge_inner_error_codes.h"
#include "framework/common/util.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/operator_factory_impl.h"

using Json = nlohmann::json;
using std::string;
using std::vector;
using std::map;

namespace ge {
namespace {
constexpr char const *kKeyOp = "op";
constexpr char const *kKeyInputDesc = "input_desc";
constexpr char const *kKeyOutputDesc = "output_desc";
constexpr char const *kKeyAttr = "attr";
constexpr char const *kKeyName = "name";
constexpr char const *kKeyType = "type";
constexpr char const *kKeyShape = "shape";
constexpr char const *kKeyShapeRange = "shape_range";
constexpr char const *kKeyValue = "value";
constexpr char const *kKeyFormat = "format";
constexpr char const *kFileSuffix = ".om";
constexpr int kDumpJsonIndent = 2;
constexpr int kShapeRangePairSize = 2;
constexpr int kShapeRangeLow = 0;
constexpr int kShapeRangeHigh = 1;

map<string, GeAttrValue::ValueType> kAttrTypeDict = {
{"bool", GeAttrValue::VT_BOOL},
{"int", GeAttrValue::VT_INT},
{"float", GeAttrValue::VT_FLOAT},
{"string", GeAttrValue::VT_STRING},
{"list_bool", GeAttrValue::VT_LIST_BOOL},
{"list_int", GeAttrValue::VT_LIST_INT},
{"list_float", GeAttrValue::VT_LIST_FLOAT},
{"list_string", GeAttrValue::VT_LIST_STRING},
{"list_list_int", GeAttrValue::VT_LIST_LIST_INT},
{"data_type", GeAttrValue::VT_DATA_TYPE},
};

map<string, DataType> kDataTypeDict = {
{"bool", DT_BOOL},
{"int8", DT_INT8},
{"uint8", DT_UINT8},
{"int16", DT_INT16},
{"uint16", DT_UINT16},
{"int32", DT_INT32},
{"uint32", DT_UINT32},
{"int64", DT_INT64},
{"uint64", DT_UINT64},
{"float16", DT_FLOAT16},
{"half", DT_FLOAT16},
{"fp16", DT_FLOAT16},
{"float", DT_FLOAT},
{"float32", DT_FLOAT},
{"double", DT_DOUBLE},
};

map<string, Format> kFormatDict = {
{"nchw", FORMAT_NCHW},
{"nhwc", FORMAT_NHWC},
{"nd", FORMAT_ND},
{"fractal_nz", FORMAT_FRACTAL_NZ},
{"fractal_z", FORMAT_FRACTAL_Z},
{"nc1hwc0", FORMAT_NC1HWC0},
};
}

template<typename T>
void SetAttrValue(const Json &j, SingleOpAttr &attr) {
attr.value.SetValue<T>(j.at(kKeyValue).get<T>());
}

template<typename T>
T GetValue(const map<string, T> &dict, string &key, T default_val) {
transform(key.begin(), key.end(), key.begin(), ::tolower);
auto it = dict.find(key);
if (it == dict.end()) {
return default_val;
}

return it->second;
}

void from_json(const Json &j, SingleOpTensorDesc &desc) {
desc.dims = j.at(kKeyShape).get<vector<int64_t>>();
auto it = j.find(kKeyShapeRange);
if (it != j.end()) {
desc.dim_ranges = j.at(kKeyShapeRange).get<vector<std::vector<int64_t>>>();
}
string format_str = j.at(kKeyFormat).get<string>();
string type_str = j.at(kKeyType).get<string>();
desc.format = GetValue(kFormatDict, format_str, FORMAT_RESERVED);
desc.type = GetValue(kDataTypeDict, type_str, DT_UNDEFINED);
auto tensor_name = j.find(kKeyName);
if (tensor_name != j.end()) {
desc.name = tensor_name->get<string>();
}
}

void from_json(const Json &j, SingleOpAttr &attr) {
attr.name = j.at(kKeyName).get<string>();
attr.type = j.at(kKeyType).get<string>();
auto it = kAttrTypeDict.find(attr.type);
if (it == kAttrTypeDict.end()) {
GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str());
return;
}

switch (it->second) {
case GeAttrValue::VT_BOOL:
SetAttrValue<bool>(j, attr);
break;
case GeAttrValue::VT_INT:
SetAttrValue<int64_t>(j, attr);
break;
case GeAttrValue::VT_FLOAT:
SetAttrValue<float>(j, attr);
break;
case GeAttrValue::VT_STRING:
SetAttrValue<string>(j, attr);
break;
case GeAttrValue::VT_LIST_BOOL:
SetAttrValue<vector<bool>>(j, attr);
break;
case GeAttrValue::VT_LIST_INT:
SetAttrValue<vector<int64_t>>(j, attr);
break;
case GeAttrValue::VT_LIST_FLOAT:
SetAttrValue<vector<float>>(j, attr);
break;
case GeAttrValue::VT_LIST_STRING:
SetAttrValue<vector<string>>(j, attr);
break;
case GeAttrValue::VT_LIST_LIST_INT:
SetAttrValue<vector<vector<int64_t>>>(j, attr);
break;
case GeAttrValue::VT_DATA_TYPE:
SetAttrValue<DataType>(j, attr);
break;
default:
GELOGE(UNSUPPORTED, "Parse attr[%s] failed. Unsupported type: %s", attr.name.c_str(), attr.type.c_str());
break;
}
}

void from_json(const Json &j, SingleOpDesc &desc) {
desc.op = j.at(kKeyOp).get<string>();

auto input_desc = j.find(kKeyInputDesc);
if (input_desc != j.end()) {
desc.input_desc = input_desc->get<vector<SingleOpTensorDesc>>();
}

auto output_desc = j.find(kKeyOutputDesc);
if (output_desc != j.end()) {
desc.output_desc = output_desc->get<vector<SingleOpTensorDesc>>();
}

auto attr_field = j.find(kKeyAttr);
if (attr_field != j.end()) {
desc.attrs = attr_field->get<vector<SingleOpAttr>>();
}
}

Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
std::string real_path = RealPath(file.c_str());
if (real_path.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10023", {"value"}, {file});
GELOGE(FAILED, "Input parameter[--singleop]'s value[%s] is not a valid path.", file.c_str());
return INTERNAL_ERROR;
}

std::ifstream ifs(real_path);
if (!ifs.is_open()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10024", {"value"}, {file});
GELOGE(FAILED, "Open file[%s] provided in input parameter[--singleop] failed.", file.c_str());
return FAILED;
}
try {
ifs >> json_obj;
} catch (const std::exception &e) {
ErrorManager::GetInstance().ATCReportErrMessage("E10025", {"realpath", "errmsg"}, {real_path, e.what()});
GELOGE(PARAM_INVALID, "Parse file[%s] provided in input parameter[--singleop] failed, exception = %s.",
real_path.c_str(), e.what());
return PARAM_INVALID;
}

ifs.close();
return SUCCESS;
}

bool SingleOpParser::Validate(const SingleOpDesc &op_desc) {
if (op_desc.op.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10026");
GELOGE(PARAM_INVALID, "Op name is empty");
return false;
}

int index = 0;
for (auto &tensor_desc : op_desc.input_desc) {
if (tensor_desc.type == DT_UNDEFINED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"input", std::to_string(index)});
GELOGE(false, "Input's dataType is invalid when the index is %d", index);
return false;
}

if (tensor_desc.format == FORMAT_RESERVED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"input", std::to_string(index)});
GELOGE(PARAM_INVALID, "Input's format is invalid when the index is %d", index);
return false;
}
++index;
}

index = 0;
for (auto &tensor_desc : op_desc.output_desc) {
if (tensor_desc.type == DT_UNDEFINED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10027", {"input", "index"}, {"output", std::to_string(index)});
GELOGE(PARAM_INVALID, "Output's dataType is invalid when the index is %d", index);
return false;
}

if (tensor_desc.format == FORMAT_RESERVED) {
ErrorManager::GetInstance().ATCReportErrMessage("E10028", {"input", "index"}, {"output", std::to_string(index)});
GELOGE(PARAM_INVALID, "Output's format is invalid when the index is %d", index);
return false;
}
++index;
}

for (auto &attr : op_desc.attrs) {
if (attr.name.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10029");
GELOGE(PARAM_INVALID, "attr name is empty");
return false;
}

if (attr.value.IsEmpty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E10030", {"attrname"}, {attr.name});
GELOGE(PARAM_INVALID, "Parse attr \"%s\" failed. ", attr.name.c_str());
return false;
}
}

return true;
}

std::unique_ptr<OpDesc> SingleOpParser::CreateOpDesc(const string &op_type) {
return std::unique_ptr<OpDesc>(new(std::nothrow) OpDesc(op_type, op_type));
}

Status SingleOpParser::ConvertToBuildParam(int index,
const SingleOpDesc &single_op_desc,
SingleOpBuildParam &build_param) {
auto op_desc = CreateOpDesc(single_op_desc.op);
if (op_desc == nullptr) {
GELOGE(MEMALLOC_FAILED, "Failed to create instance of opDesc");
return MEMALLOC_FAILED;
}

std::stringstream file_name;
file_name << index;
file_name << "_" << single_op_desc.op;
for (auto &desc : single_op_desc.input_desc) {
file_name << "_" << desc.type << "_" << desc.format;
for (auto dim : desc.dims) {
file_name << "_" << dim;
}
GeTensorDesc ge_tensor_desc(GeShape(desc.dims),
desc.format,
desc.type);
ge_tensor_desc.SetOriginFormat(desc.format);
GE_CHK_STATUS_RET_NOLOG(SetShapeRange(op_desc->GetName(), desc, ge_tensor_desc));
TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size());
TensorUtils::SetInputTensor(ge_tensor_desc, true);
TensorUtils::SetOutputTensor(ge_tensor_desc, false);
if (desc.name.empty()) {
op_desc->AddInputDesc(ge_tensor_desc);
} else {
op_desc->AddInputDesc(desc.name, ge_tensor_desc);
}
build_param.inputs.emplace_back(ge_tensor_desc);
}

for (auto &desc : single_op_desc.output_desc) {
file_name << "_" << desc.type << "_" << desc.format;
for (auto dim : desc.dims) {
file_name << "_" << dim;
}

GeTensorDesc ge_tensor_desc(GeShape(desc.dims),
desc.format,
desc.type);
ge_tensor_desc.SetOriginFormat(desc.format);
GE_CHK_STATUS_RET_NOLOG(SetShapeRange(op_desc->GetName(), desc, ge_tensor_desc));
TensorUtils::SetRealDimCnt(ge_tensor_desc, desc.dims.size());
TensorUtils::SetInputTensor(ge_tensor_desc, false);
TensorUtils::SetOutputTensor(ge_tensor_desc, true);
if (desc.name.empty()) {
op_desc->AddOutputDesc(ge_tensor_desc);
} else {
op_desc->AddOutputDesc(desc.name, ge_tensor_desc);
}
build_param.outputs.emplace_back(ge_tensor_desc);
}

for (const auto &attr : single_op_desc.attrs) {
op_desc->SetAttr(attr.name, attr.value);
}

if (VerifyOpInputOutputSizeByIr(*op_desc) != SUCCESS) {
GELOGE(PARAM_INVALID, "Verify op [%s] input or output size failed.", op_desc->GetType().c_str());
return PARAM_INVALID;
}

file_name << kFileSuffix;
build_param.file_name = file_name.str();
build_param.op_desc.reset(op_desc.release());
return SUCCESS;
}

Status SingleOpParser::VerifyOpInputOutputSizeByIr(const OpDesc &current_op_desc) {
ge::Operator operator_ir = ge::OperatorFactory::CreateOperator("tmp_operator", current_op_desc.GetType());
if (!operator_ir.IsEmpty()) {
auto opdesc_ir = ge::OpDescUtils::GetOpDescFromOperator(operator_ir);
GE_CHECK_NOTNULL(opdesc_ir);
size_t current_opdesc_inputs_num = current_op_desc.GetInputsSize();
size_t ir_opdesc_inputs_num = opdesc_ir->GetInputsSize();
if (current_opdesc_inputs_num < ir_opdesc_inputs_num) {
string reason = "is smaller than the ir needed input size " + std::to_string(ir_opdesc_inputs_num);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{current_op_desc.GetName(), "input size " + std::to_string(current_opdesc_inputs_num), reason});
GELOGE(PARAM_INVALID, "This op [%s] input size %zu is smaller than the ir needed input size %zu",
current_op_desc.GetName().c_str(), current_opdesc_inputs_num, ir_opdesc_inputs_num);
return PARAM_INVALID;
}
size_t current_opdesc_outputs_num = current_op_desc.GetOutputsSize();
size_t ir_opdesc_outputs_num = opdesc_ir->GetOutputsSize();
if (current_opdesc_outputs_num < ir_opdesc_outputs_num) {
string reason = "is smaller than the ir needed output size " + std::to_string(ir_opdesc_outputs_num);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{current_op_desc.GetName(), "output size " + std::to_string(current_opdesc_outputs_num), reason});
GELOGE(PARAM_INVALID, "This op [%s] output size %zu is smaller than the ir needed output size %zu",
current_op_desc.GetName().c_str(), current_opdesc_outputs_num, ir_opdesc_outputs_num);
return PARAM_INVALID;
}
}
return SUCCESS;
}

Status SingleOpParser::SetShapeRange(const std::string &op_name,
const SingleOpTensorDesc &tensor_desc,
GeTensorDesc &ge_tensor_desc) {
auto num_shape_ranges = tensor_desc.dim_ranges.size();
GELOGD("Number of shape ranges = %zu", num_shape_ranges);
auto it = std::find(tensor_desc.dims.begin(), tensor_desc.dims.end(), ge::UNKNOWN_DIM_NUM);
if (it != tensor_desc.dims.end()) {
if (tensor_desc.dims != ge::UNKNOWN_RANK) {
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{op_name,
"shape",
"has unknown rank but dim size is not one"});
GELOGE(PARAM_INVALID, "Invalid tensor shape: [%s]", ge_tensor_desc.MutableShape().ToString().c_str());
return PARAM_INVALID;
}
if (!tensor_desc.dim_ranges.empty()) {
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{op_name,
"shape range",
"is not needed while the rank the shape is unknown"});
GELOGE(PARAM_INVALID, "Shape range is not needed while the rank the shape is unknown");
return PARAM_INVALID;
}

GELOGD("Shape is unknown rank, do not set shape range");
return SUCCESS;
}

std::vector<std::pair<int64_t, int64_t>> shape_range;
size_t range_index = 0;
for (auto dim : tensor_desc.dims) {
if (dim >= 0) {
shape_range.emplace_back(dim, dim);
GELOGD("Adding shape range: [%ld, %ld]", dim, dim);
} else {
GELOGD("To get shape range by index = %zu", range_index);
if (range_index >= num_shape_ranges) {
string reason = "is smaller than the unknown dim size " + std::to_string(++range_index);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{op_name,
"shape range size " + std::to_string(num_shape_ranges),
reason});
GELOGE(PARAM_INVALID, "The number of shape_range mismatches that of unknown dims.");
return PARAM_INVALID;
}

auto &range = tensor_desc.dim_ranges[range_index];
if (range.size() != kShapeRangePairSize) {
string reason = "has " + std::to_string(range.size()) + " item(s)";
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{op_name,
"shape range " + std::to_string(range_index),
reason});
GELOGE(PARAM_INVALID, "Invalid shape range entry. index = %zu, size = %zu", range_index, range.size());
return PARAM_INVALID;
}

shape_range.emplace_back(range[kShapeRangeLow], range[kShapeRangeHigh]);
GELOGD("Adding shape range: [%ld, %ld]", range[kShapeRangeLow], range[kShapeRangeHigh]);
++range_index;
}
}

if (num_shape_ranges != range_index) {
string reason = "is greater than the unknown dim size " + std::to_string(range_index);
ErrorManager::GetInstance().ATCReportErrMessage("E19014", {"opname", "value", "reason"},
{op_name,
"shape range size " + std::to_string(num_shape_ranges),
reason});
GELOGE(PARAM_INVALID,
"The number of shape_range(%zu) mismatches that of unknown dims(%zu).",
num_shape_ranges,
range_index);
return PARAM_INVALID;
}

if (range_index > 0) {
ge_tensor_desc.SetShapeRange(shape_range);
}

return SUCCESS;
}

Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<SingleOpBuildParam> &op_list) {
Json single_op_list_json;
auto ret = ReadJsonFile(file, single_op_list_json);
if (ret != SUCCESS) {
return ret;
}

int index = 0;
for (const Json &single_op_json : single_op_list_json) {
SingleOpDesc single_op_desc;
try {
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
single_op_desc = single_op_json;
} catch (const nlohmann::json::exception &e) {
ErrorManager::GetInstance().ATCReportErrMessage("E10032", {"index", "jsonfile", "exception"},
{std::to_string(index), file, e.what()});
GELOGE(PARAM_INVALID, "Parse the index[%d] of op failed when read json file[%s], exception %s",
index, file.c_str(), e.what());
return PARAM_INVALID;
}

if (!Validate(single_op_desc)) {
GELOGE(PARAM_INVALID, "Validate the index[%d] of op failed when read json file[%s].", index, file.c_str());
return PARAM_INVALID;
}

SingleOpBuildParam param;
ret = ConvertToBuildParam(index, single_op_desc, param);
if (ret != SUCCESS) {
return ret;
}

op_list.emplace_back(param);
GELOGI("Parse the index[%d] of op success", index);
index += 1;
}

return SUCCESS;
}
} // namespace ge


+ 0
- 78
ge/offline/single_op_parser.h View File

@@ -1,78 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ACL_TOOLS_COMPILE_PARSER_H
#define ACL_TOOLS_COMPILE_PARSER_H

#include <vector>
#include <string>

#include <nlohmann/json.hpp>

#include "ge/ge_api_error_codes.h"
#include "graph/types.h"
#include "graph/ge_attr_value.h"
#include "graph/op_desc.h"

namespace ge {
struct SingleOpTensorDesc {
std::string name;
std::vector<int64_t> dims;
std::vector<std::vector<int64_t>> dim_ranges;
ge::Format format = ge::FORMAT_RESERVED;
ge::DataType type = ge::DT_UNDEFINED;
};

struct SingleOpAttr {
std::string name;
std::string type;
ge::GeAttrValue value;
};

struct SingleOpDesc {
std::string op;
std::vector<SingleOpTensorDesc> input_desc;
std::vector<SingleOpTensorDesc> output_desc;
std::vector<SingleOpAttr> attrs;
};

struct SingleOpBuildParam {
ge::OpDescPtr op_desc;
std::vector<ge::GeTensor> inputs;
std::vector<ge::GeTensor> outputs;
std::string file_name;
};

void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc);

void from_json(const nlohmann::json &json, SingleOpAttr &desc);

void from_json(const nlohmann::json &json, SingleOpDesc &desc);

class SingleOpParser {
public:
static Status ParseSingleOpList(const std::string &file, std::vector<SingleOpBuildParam> &op_list);

private:
static Status ReadJsonFile(const std::string &file, nlohmann::json &json_obj);
static bool Validate(const SingleOpDesc &op_desc);
static std::unique_ptr<OpDesc> CreateOpDesc(const std::string &op_type);
static Status ConvertToBuildParam(int index, const SingleOpDesc &single_op_desc, SingleOpBuildParam &build_param);
static Status VerifyOpInputOutputSizeByIr(const OpDesc &current_op_desc);
static Status SetShapeRange(const std::string &op_name, const SingleOpTensorDesc &tensor_desc, GeTensorDesc &ge_tensor_desc);
};
} // namespace ge

#endif // ACL_TOOLS_COMPILE_PARSER_H

+ 0
- 49
ge/plugin/engine/CMakeLists.txt View File

@@ -1,49 +0,0 @@
set(SRC_LIST
"dnnengines.cc"
"engine_manage.cc"
)

############ libengine.so ############
add_library(engine SHARED ${SRC_LIST})

target_compile_options(engine PRIVATE
-Werror
)

target_compile_definitions(engine PRIVATE
REUSE_MEMORY=1
PROTOBUF_INLINE_NOT_IN_HEADERS=0
)

target_include_directories(engine PRIVATE
${GE_CODE_DIR}/ge
${GE_CODE_DIR}/inc/
${GE_CODE_DIR}/inc/framework
${GE_CODE_DIR}/inc/framework/common
${GE_CODE_DIR}/inc/external
${METADEF_DIR}/inc
${METADEF_DIR}/inc/external
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
${GE_CODE_DIR}/third_party/fwkacllib/inc
)

target_link_libraries(engine PRIVATE
$<BUILD_INTERFACE:intf_pub>
-Wl,--no-as-needed
slog
-Wl,--as-needed
-lrt
-ldl
)

############ install ############
set(INSTALL_BASE_DIR "")
set(INSTALL_LIBRARY_DIR lib)

install(TARGETS engine OPTIONAL
LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
)

+ 0
- 1821
ge/proto/caffe/caffe.proto
File diff suppressed because it is too large
View File


+ 0
- 127
ge/proto/dump_task.proto View File

@@ -1,127 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";
package toolkit.dumpdata;

enum OutputDataType {
DT_UNDEFINED = 0;
DT_FLOAT = 1;
DT_FLOAT16 = 2;
DT_INT8 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_UINT16 = 6;
DT_INT32 = 7;
DT_INT64 = 8;
DT_UINT32 = 9;
DT_UINT64 = 10;
DT_BOOL = 11;
DT_DOUBLE = 12;
DT_STRING = 13;
DT_DUAL_SUB_INT8 = 14;
DT_DUAL_SUB_UINT8 = 15;
DT_COMPLEX64 = 16;
DT_COMPLEX128 = 17;
DT_QINT8 = 18;
DT_QINT16 = 19;
DT_QINT32 = 20;
DT_QUINT8 = 21;
DT_QUINT16 = 22;
DT_RESOURCE = 23;
DT_STRING_REF = 24;
DT_DUAL = 25;
}

enum OutputFormat {
FORMAT_NCHW = 0;
FORMAT_NHWC = 1;
FORMAT_ND = 2;
FORMAT_NC1HWC0 = 3;
FORMAT_FRACTAL_Z = 4;
FORMAT_NC1C0HWPAD = 5;
FORMAT_NHWC1C0 = 6;
FORMAT_FSR_NCHW = 7;
FORMAT_FRACTAL_DECONV = 8;
FORMAT_C1HWNC0 = 9;
FORMAT_FRACTAL_DECONV_TRANSPOSE = 10;
FORMAT_FRACTAL_DECONV_SP_STRIDE_TRANS = 11;
FORMAT_NC1HWC0_C04 = 12;
FORMAT_FRACTAL_Z_C04 = 13;
FORMAT_CHWN = 14;
FORMAT_FRACTAL_DECONV_SP_STRIDE8_TRANS = 15;
FORMAT_HWCN = 16;
FORMAT_NC1KHKWHWC0 = 17;
FORMAT_BN_WEIGHT = 18;
FORMAT_FILTER_HWCK = 19;
FORMAT_HASHTABLE_LOOKUP_LOOKUPS=20;
FORMAT_HASHTABLE_LOOKUP_KEYS = 21;
FORMAT_HASHTABLE_LOOKUP_VALUE = 22;
FORMAT_HASHTABLE_LOOKUP_OUTPUT = 23;
FORMAT_HASHTABLE_LOOKUP_HITS=24;
FORMAT_C1HWNCoC0 = 25;
FORMAT_MD = 26;
FORMAT_NDHWC = 27;
FORMAT_FRACTAL_ZZ = 28;
FORMAT_FRACTAL_NZ = 29;
FORMAT_RESERVED = 30;
}

message OriginalOp {
string name = 1;
uint32 output_index = 2;
OutputDataType data_type = 3;
OutputFormat format = 4;
}

message Shape {
repeated uint64 dim = 1;
}

message OpOutput {
OutputDataType data_type = 1;
OutputFormat format = 2;
Shape shape = 3;
OriginalOp original_op = 4; // the original op corresponding to the output
bytes data = 5;
uint64 size = 6;
}

message OpInput {
OutputDataType data_type = 1;
OutputFormat format = 2;
Shape shape = 3;
bytes data = 4;
uint64 size = 5;
}

enum BufferType {
L1 = 0;
}

message OpBuffer {
BufferType buffer_type = 1;
bytes data = 2;
uint64 size = 3;
}

message DumpData{
string version = 1;
uint64 dump_time = 2;
repeated OpOutput output = 3;
repeated OpInput input = 4;
repeated OpBuffer buffer = 5;
}

+ 0
- 104
ge/proto/ge_api.proto View File

@@ -1,104 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";
package ge.api_pb;

import "ge_ir.proto";

// GE initialize
message GEInitialize {
map<string, string> options = 1;
};

// initialize response
message GEInitializeResponse {
uint32 status = 1;
uint32 clientId = 2;
};

// GE finalize
message GEFinalize {
bool final = 1;
uint32 clientId = 2;
};

message GEFinalizeResponse {
uint32 status = 1;
};

// GE Session
message CreateSession{
map<string, string> options = 1;
};

message CreateSessionResponse {
uint32 status = 1;
uint64 sessionId = 2;
};

//GE AddGraph
//model serialize :: serializegraph
message SessionAddGraph{
uint32 graphId = 1;
uint64 sessionId = 2;
ge.proto.GraphDef graph = 3;
};

message SessionAddGraphResponse {
uint32 status = 1;
};

//GE SessionRemoveGraph
message SessionRemoveGraph{
uint32 graphId = 1;
uint64 sessionId = 2;
};

message SessionRemoveGraphResponse {
uint32 status = 1;
};

message SessionRunGraph{
uint32 graphId = 1;
uint64 sessionId = 2;
repeated ge.proto.TensorDef tensor = 3;
};

message SessionBuildGraph{
uint32 graphId = 1;
uint64 sessionId = 2;
repeated ge.proto.TensorDef tensor = 3;
string savePath = 4;
};

message SessionRunGraphResponse {
uint32 status = 1;
repeated ge.proto.TensorDef tensor = 2;
};

message SessionBuildGraphResponse {
uint32 status = 1;
};

message DestroySession{
bool final = 1;
uint64 sessionId = 2;
};

message DestroySessionResponse {
uint32 status = 1;
};

+ 0
- 206
ge/proto/ge_ir.proto View File

@@ -1,206 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package ge.proto;

enum DataType
{
DT_UNDEFINED = 0; // Used to indicate a DataType field has not been set.
DT_FLOAT = 1; // float type
DT_FLOAT16 = 2; // fp16 type
DT_INT8 = 3; // int8 type
DT_UINT8 = 4; // uint8 type
DT_INT16 = 5; // int16 type
DT_UINT16 = 6; // uint16 type
DT_INT32 = 7; //
DT_INT64 = 8; // int64 type
DT_UINT32 = 9; // unsigned int32
DT_UINT64 = 10; // unsigned int64
DT_BOOL = 11; // bool type
DT_DOUBLE = 12; // double type
DT_STRING = 13; // string type
DT_DUAL_SUB_INT8 = 14; /**< dual output int8 type */
DT_DUAL_SUB_UINT8 = 15; /**< dual output uint8 type */
DT_COMPLEX64 = 16; // complex64 type
DT_COMPLEX128 = 17; // complex128 type
DT_QINT8 = 18; // qint8 type
DT_QINT16 = 19; // qint16 type
DT_QINT32 = 20; // qint32 type
DT_QUINT8 = 21; // quint8 type
DT_QUINT16 = 22; // quint16 type
DT_RESOURCE = 23; // resource type
DT_STRING_REF = 24; // string_ref type
DT_DUAL = 25; /**< dual output type */
}

message AttrDef
{
message ListValue
{
enum ListValueType{
VT_LIST_NONE = 0;
VT_LIST_STRING = 1;
VT_LIST_INT = 2;
VT_LIST_FLOAT = 3;
VT_LIST_BOOL = 4;
VT_LIST_BYTES = 5;
VT_LIST_TENSOR_DESC = 6;
VT_LIST_TENSOR = 7;
VT_LIST_GRAPH = 8;
VT_LIST_NAMED_ATTRS = 9;
VT_LIST_DATA_TYPE = 10;
}
repeated bytes s = 2; // "list(string)"
repeated int64 i = 3; // "list(int)"
repeated float f = 4; // "list(float)"
repeated bool b = 5; // "list(bool)"
repeated bytes bt = 7;
repeated TensorDescriptor td = 8;
repeated TensorDef t = 9;
repeated GraphDef g = 10;
repeated NamedAttrs na = 11;
repeated int64 dt = 12; // list ge::DataType

ListValueType val_type = 20;
}

message ListListInt{
message ListInt{
repeated int64 list_i = 1; // list int
}
repeated ListInt list_list_i = 1; // list list int
}

oneof value
{
bytes s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10; // Used to support attr nesting
TensorDescriptor td = 11; // GeTensorDesc type
TensorDef t = 12; // GeTensor type
GraphDef g = 13; // Graph type
ListListInt list_list_int = 14; // List List Int type
int64 dt = 15; // ge::DataType
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs
{
string name = 1;
map<string, AttrDef> attr = 2;
}

// Shape / dimension description, using row-major order
message ShapeDef
{
repeated int64 dim = 1; // Size of each dimension
}

// Multidimensional data description
message TensorDescriptor
{
string name = 1; // Optional parameter, tensor name

DataType dtype = 2; // tensor datatype
ShapeDef shape = 3; // Shape / dimension
string layout = 4; // Tensor format, eg: "NCHW", "NHWC", "CHW", "ND"

bool has_out_attr = 9;
int64 size = 10;
int64 weight_size = 11;
bool reuse_input = 12;
bool output_tensor = 13;
string device_type = 14;
bool input_tensor =15;
int64 real_dim_cnt = 16;
int64 reuse_input_index = 17;
int64 data_offset = 18;
int64 cmps_size = 19;
string cmps_tab = 20;
int64 cmps_tab_offset = 21;

map<string, AttrDef> attr = 5; // Set of extra parameter fields
}

// GeTensor definition
message TensorDef
{
TensorDescriptor desc = 1; // Tensor description
bytes data = 2; // Tensor data
}


// Operator description
message OpDef
{
string name = 1; // name
string type = 2; // type

repeated string input = 5; // input original op name + outgoing index. op_name:index

map<string, AttrDef> attr = 10; // Set of operator parameter fields

bool has_out_attr = 20;
int64 id = 21;
int64 stream_id =22;
repeated string input_name = 23;
repeated string src_name = 24;
repeated int64 src_index = 25;
repeated string dst_name = 26;
repeated int64 dst_index = 27;
repeated int64 input_i = 28;
repeated int64 output_i = 29;
repeated int64 workspace = 30;
repeated int64 workspace_bytes = 31;
repeated bool is_input_const = 32;
repeated TensorDescriptor input_desc = 33;
repeated TensorDescriptor output_desc = 34;
repeated string subgraph_name = 35;
}

// Graph definition
message GraphDef
{
string name = 1; // name

repeated string input = 4; // Graph input
repeated string output = 5; // Graph output

repeated OpDef op = 6; // List of operators

map<string, AttrDef> attr = 11; // Extended field
}

// model definition
message ModelDef
{
string name = 1; // name
uint32 version = 2; // IR Proto verion
string custom_version = 3; // User model version number, passed in by user

repeated GraphDef graph = 7; // Graph definition,graph[0] represents the main diagram in modeldef

map<string, AttrDef> attr = 11; // Extended field
}


+ 0
- 152
ge/proto/insert_op.proto View File

@@ -1,152 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message InsertNewOps {
repeated AippOpParams aipp_op = 1;
repeated MultiShapeOpParams multi_shape_op = 2;
}

message AippOpParams {
enum InputFormat {
UNDEFINED = 0;
YUV420SP_U8 = 1;
XRGB8888_U8 = 2;
RGB888_U8 = 3;
YUV400_U8 = 4;
NC1HWC0DI_FP16 = 5;
NC1HWC0DI_S8 = 6;
ARGB8888_U8 = 7;
YUYV_U8 = 8;
YUV422SP_U8 = 9;
AYUV444_U8 = 10;
RAW10 = 11;
RAW12 = 12;
RAW16 = 13;
RAW24 = 14;
RGB16 = 15;
RGB20 = 16;
RGB24 = 17;
RGB8_IR = 18;
RGB16_IR = 19;
RGB24_IR = 20;
}

enum AippMode {
undefined = 0;
static = 1;
dynamic = 2;
}

// AIPP模式,区分静态AIPP和动态AIPP
AippMode aipp_mode = 1;

// related_input_rank参数为必填,类型为整型,配置范围>=0, <=输入Data算子的个数,默认值为0。
// 标识对模型的第几个输入做AIPP处理,例如模型有两个输入,需要对第2个输入做AIPP,则配置related_input_rank为1。
uint32 related_input_rank = 2;

// input_edge_idx参数为可选,类型为整型,配置范围为>=0。
// 配置该参数的作用,在于对Data算子不同的输出做不同的AIPP处理,如果该参数没有配置,默认对related_input_rank指定的模型输入的所有输出边做AIPP。
// 配置值 <= Data算子输出边的个数。
repeated uint32 input_edge_idx = 3;

// [Begin] 动态AIPP参数,配置静态AIPP时无效
uint32 max_src_image_size = 4;

// 是否支持旋转。默认不支持,开启支持旋转时,会有额外的空间和性能损失
bool support_rotation = 5;

// [End] 动态AIPP参数


// [Begin] 静态AIPP参数,配置动态AIPP时无效
InputFormat input_format = 51;
bool csc_switch = 52;
float cpadding_value = 53;
bool rbuv_swap_switch = 54;
bool ax_swap_switch = 55;
bool single_line_mode = 56;

int32 src_image_size_w = 57;
int32 src_image_size_h = 58;

bool crop = 59;
int32 load_start_pos_w = 60;
int32 load_start_pos_h = 61;
int32 crop_size_w = 62;
int32 crop_size_h = 63;

bool resize = 64;
int32 resize_output_w = 65;
int32 resize_output_h = 66;

bool padding = 67;
int32 left_padding_size = 68;
int32 right_padding_size = 69;
int32 top_padding_size = 70;
int32 bottom_padding_size = 71;

int32 mean_chn_0 = 10;
int32 mean_chn_1 = 11;
int32 mean_chn_2 = 12;
int32 mean_chn_3 = 19;
float min_chn_0 = 13;
float min_chn_1 = 14;
float min_chn_2 = 15;
float min_chn_3 = 20;
repeated float var_reci_chn_0 = 16;
repeated float var_reci_chn_1 = 17;
repeated float var_reci_chn_2 = 18;
repeated float var_reci_chn_3 = 21;

repeated int32 matrix_r0c0 = 30;
repeated int32 matrix_r0c1 = 31;
repeated int32 matrix_r0c2 = 32;
repeated int32 matrix_r1c0 = 33;
repeated int32 matrix_r1c1 = 34;
repeated int32 matrix_r1c2 = 35;
repeated int32 matrix_r2c0 = 36;
repeated int32 matrix_r2c1 = 37;
repeated int32 matrix_r2c2 = 38;
repeated int32 output_bias_0 = 39;
repeated int32 output_bias_1 = 40;
repeated int32 output_bias_2 = 41;
repeated int32 input_bias_0 = 42;
repeated int32 input_bias_1 = 43;
repeated int32 input_bias_2 = 44;

// [End] 静态AIPP参数

// The n number that is used for raw/rgbir data into f16 transformation.
// The transformation equation is x/(2^n). If set to 0, no transform is performed.
uint32 raw_rgbir_to_f16_n = 45;
}

message MultiShapeOpParams {
enum MultiShapeMode {
batch = 0; //动态batch
resolution = 1; //动态分辨率,扩展用
}

MultiShapeMode mode = 1; //算子模式
uint32 related_input_rank = 2; //新增算子插入到哪个输入


repeated uint32 batch_list = 11; //batch_list值,batch_list的个数是2到8之间
}

+ 0
- 401
ge/proto/om.proto View File

@@ -1,401 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

enum TargetType
{
MINI = 0;
TINY = 1;
LITE = 2;
}

// offline model
message ModelDef {
string name = 1;
uint32 version = 2;

uint64 memory_size = 10;
uint32 stream_num = 11;
uint32 event_num = 12;
uint64 weight_size = 13;
uint32 label_num = 15;
repeated OpDef op = 20;
TargetType target_type = 23;

map<string, AttrDef> attr = 30;
};

// operator define
message OpDef {
string name = 1;
string type = 2;

uint32 id = 3;
uint32 stream_id = 4;

repeated string input_name = 5;

repeated string src_name = 8;
repeated int32 src_index = 9;
repeated int64 input = 10;
repeated int64 output = 11;
repeated TensorDescriptor input_desc = 12;
repeated TensorDescriptor output_desc = 13;
repeated WeightDef weights = 14;
repeated string dst_name = 15;
repeated int32 dst_index = 16;

repeated int64 workspace = 20;
repeated uint32 workspace_bytes = 21;

repeated string weight_name = 22;
repeated bool is_input_const = 23;

map<string, AttrDef> attr = 30;

QuantizeFactorParams quantize_factor = 31;

oneof op_params {
// start at 100 here
SendOpParams sender_param = 100;
RecvOpParams receiver_param = 200;
ConvolutionOpParams convolution_param = 300;
PoolingOpParams pooling_param = 400;
EltwiseOpParams eltwise_param = 500;
BatchNormOpParams batchnorm_param = 600;
ScaleOpParams scale_param = 700;
FullConnectionOpParams full_connection_param = 800;
SoftmaxOpParams softmax_param = 900;
ActivationOpParams activation_param = 1000;
ReshapeOpParams reshape_param = 1100;
}
};

message SendOpParams {
uint32 event_id = 1;
};

message RecvOpParams {
uint32 event_id = 1;
};

enum QuantizeScaleType
{
VECTOR_SCALE = 0;
SCALAR_SCALE = 1;
}

enum QuantizeScaleMode
{
NORMAL_MODE = 0;
SQRT_MODE = 1;
}

enum QuantizeAlgorithm
{
NON_OFFSET_ALGO = 0;
HALF_OFFSET_ALGO = 1;
ALL_OFFSET_ALGO = 2;
}
message QuantizeFactor
{
QuantizeScaleMode scale_mode = 1;
bytes scale_value = 2;
int64 scale_offset = 3;
bytes offset_data_value = 4;
int64 offset_data_offset = 5;
bytes offset_weight_value = 6;
int64 offset_weight_offset = 7;
bytes offset_pad_value = 8;
int64 offset_pad_offset = 9;
};

message QuantizeCalcFactor
{
bytes offsetw = 1;
int64 offsetw_offset = 2;
bytes offsetd = 3;
int64 offsetd_offset = 4;
bytes scalereq = 5;
int64 scaledreq_offset = 6;
bytes offsetdnext = 7;
int64 offsetdnext_offset = 8;
}

message QuantizeFactorParams
{
QuantizeAlgorithm quantize_algo = 1;
QuantizeScaleType scale_type = 2;
QuantizeFactor quantize_param = 3;
QuantizeFactor dequantize_param = 4;
QuantizeFactor requantize_param = 5;
QuantizeCalcFactor quantizecalc_param = 6;
};

message ConvolutionOpParams {
int32 mode = 1;
int32 algo = 2;
int32 pad_mode = 3;
uint32 group = 4;
uint32 num_output = 5;

repeated uint32 pad = 10;
repeated uint32 stride = 11;
repeated uint32 dilation = 12;
repeated uint32 kernel = 13;

float alpha = 20;
float beta = 21;

WeightDef filter = 40;
WeightDef bias = 41;

bool relu_flag = 62;
repeated uint32 adj = 70;
repeated uint32 target_shape = 71;
repeated uint32 before_pad = 72;
};

message PoolingOpParams {
int32 mode = 1;
int32 nan_opt = 2;
int32 pad_mode = 3;
bool global_pooling = 4;

repeated uint32 window = 10;
repeated uint32 pad = 11;
repeated uint32 stride = 12;
bool ceil_mode = 13;
int32 data_mode = 14;

float alpha = 20;
float beta = 21;
repeated uint32 before_pad = 22;
};

message EltwiseOpParams {
int32 mode = 1;
repeated float coeff = 2;
float alpha = 3;
float beta = 4;
repeated WeightDef weight = 5;
bool relu_flag = 6;
};

message ActivationOpParams {
int32 mode = 1;
float coef = 2;
float alpha = 3;
float beta = 4;
};

message BatchNormOpParams {
int32 mode = 1;

float alpha = 2;
float beta = 3;
double epsilon = 4;//optinal,[default = 1e-5]
bool use_global_stats = 5; //optinal,by default true,testing mode
float moving_average_fraction = 6; //optinal,[default = .999];

WeightDef estimated_mean = 7;
WeightDef estimated_variance = 8;

WeightDef scale = 9;
WeightDef bias = 10;
};

message ScaleOpParams {
WeightDef scale = 1;
WeightDef bias = 2;
};

message ReshapeOpParams {
float alpha = 1;
float beta = 2;
ShapeDef shape = 3;
int32 axis = 4;
int32 num_axes = 5;
int32 format = 6;
};

message SoftmaxOpParams {
int32 algo = 1;
int32 mode = 2;
float alpha = 3;
float beta = 4;
};

message FullConnectionOpParams {
WeightDef filter = 1;
WeightDef bias = 2;
uint32 num_output = 3;
bool relu_flag = 12;
};

message FlattenOpParams {
float alpha = 1;
float beta = 2;
int32 start_axis = 3;
int32 end_axis = 4;
}

message AddLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message MulLimitedOpParams {
float alpha = 1;
float beta = 2;
int32 axis = 3;
bool broadcast = 4;

repeated WeightDef weight = 10;
};

message AddOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message MulOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message SubOpParams {
float alpha = 1;
float beta = 2;

repeated WeightDef weight = 10;
};

message BiasAddOpParams {
float alpha = 1;
float beta = 2;

WeightDef bias = 10;
};

message MatMulOpParams {
float alpha = 1;
float beta = 2;
bool transposeX = 3;
bool transposeW = 4;

WeightDef filter = 10;
WeightDef bias = 12;
};

message RsqrtOpParams {
float alpha = 1;
float beta = 2;
};


message WeightDef {
int32 format = 1;
int32 data_type = 2;
ShapeDef shape = 3;
bytes data = 4;
int64 data_offset = 5;
uint32 cmps_size = 6;
bytes cmps_tab = 7;
int64 cmps_tab_offset = 10;
CompressInfo cmps_info = 8;
AllOffsetQuantizeInfo alloffset_quantize_info = 11;
}

message ShapeDef {
repeated int64 dim = 1;
}

enum DeviceType {
NPU = 0; // In default, we will use NPU.
CPU = 1; // CPU
}

message AllOffsetQuantizeInfo {
float scale = 1;
int32 offset = 2;
}

message TensorDescriptor {
int32 format = 1;
int32 data_type = 2;
repeated int64 dim = 3;
uint32 size = 4;
bool reuse_input = 5;
bool output_tensor = 7;
DeviceType device_type = 8;
bool input_tensor = 9;
uint32 real_dim_cnt = 10;
uint32 reuse_input_index = 11;
AllOffsetQuantizeInfo alloffset_quantize_info = 12;
}

message CompressInfo {
int32 blockRow = 1; // block row
int32 blockCol = 2; // block col
int32 fractalK = 3; // fractal K
int32 fractalN = 4; // fractal N
int32 lastFractalK = 5; // K of last fractal
int32 lastFractalN = 6; // N of last fractal
int32 cubeSize = 7; // cube's length
int32 loadDir = 8; // data load directtiono 0:col load 1:row load
}

message AttrDef {
message ListValue {
repeated string s = 2; // "list(string)"
repeated int64 i = 3 [packed = true]; // "list(int)"
repeated float f = 4 [packed = true]; // "list(float)"
repeated bool b = 5 [packed = true]; // "list(bool)"
repeated uint32 u = 6 [packed = true]; // "list(uint)"
repeated bytes bt = 7;
}

oneof value {
string s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
uint32 u = 6; // "uint32"
bytes bt = 7;
ListValue list = 1; // any "list(...)"
NamedAttrs func = 10;
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NamedAttrs {
string name = 1;
map<string, AttrDef> attr = 2;
}


+ 0
- 89
ge/proto/op_mapping_info.proto View File

@@ -1,89 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";
package aicpu.dump;

message Shape {
repeated uint64 dim = 1;
}

message Output {
int32 data_type = 1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
string original_name = 5;
int32 original_output_index = 6;
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
}

message Input {
int32 data_type =1;
int32 format = 2;
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
}

enum BufferType {
L1 = 0;
}

message OpBuffer {
BufferType buffer_type = 1;
uint64 address = 2;
uint64 size = 3;
}

message Op {
string op_name = 1;
string op_type = 2;
}

message Task {
uint32 task_id = 1;
uint32 stream_id = 2;
Op op = 3;
repeated Output output = 4;
bool end_graph = 5;
repeated Input input = 6;
repeated OpBuffer buffer = 7;
}

message OpMappingInfo {
string dump_path = 1;
oneof model_name_param {
string model_name = 2;
}
oneof model_id_param {
uint32 model_id = 3;
}
oneof step_id {
uint64 step_id_addr = 4;
}
oneof iterations_per_loop {
uint64 iterations_per_loop_addr = 5;
}
oneof loop_cond {
uint64 loop_cond_addr = 6;
}
uint32 flag = 7; // 0x01 load, 0x00 unload
repeated Task task = 8;
string dump_step = 9;
}

+ 0
- 170
ge/proto/task.proto View File

@@ -1,170 +0,0 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = "proto3";

package domi;

message ModelTaskDef {
string version = 1;

map<string, string> attr = 9; // Extended field
repeated TaskDef task = 10;

uint64 memory_size = 11;
uint32 stream_num = 12;
uint32 event_num = 13;
uint64 weight_size = 14;

repeated bytes op = 15; // input/output opdef in bytes

uint64 base_addr = 16; // base addr
uint64 weight_addr = 17; // weight addr
uint32 batch_num = 18;
}


message TaskDef {
uint32 id = 1;
uint32 type = 2;

uint32 stream_id = 10;
uint32 event_id = 11;

KernelDef kernel = 20;
KernelExDef kernel_ex = 21;
KernelHcclDef kernel_hccl = 25;
EventExDef event_ex = 26;
LogTimeStampDef log_timestamp = 28;

uint32 label_id = 30;

MemcpyAsyncDef memcpy_async = 31;
StreamSwitchDef stream_switch = 32;
StreamActiveDef stream_active = 33;
bytes private_def = 34;
uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future
StreamSwitchNDef stream_switch_n = 36;

LabelSetDef label_set = 37;
LabelGotoExDef label_goto_ex = 38;
LabelSwitchByIndexDef label_switch_by_index = 39;
}

message KernelDef {
KernelContext context = 1;

string stub_func = 10;
uint32 block_dim = 11;
uint32 args_size = 12;
bytes args = 13;
bytes sm_desc = 14;
bytes flowtable = 15;
string so_name = 16;
string kernel_name = 17;
bytes kernel_ext_info = 18;
uint32 kernel_ext_info_size = 19;
}

message KernelContext {
uint32 kernel_type = 1;
uint32 op_id = 2; // OP type in CCE
uint32 kernel_func_id = 3;
uint32 op_index = 4; // TE/Custom operator
bool is_flowtable = 5; // Identify whether args is a flowtable structure
bytes args_offset = 6; // args offset information
uint32 args_count = 7; // args count
repeated uint32 origin_op_index = 8;
}


message KernelExDef {
uint32 flags = 1;

uint32 op_index = 4;
uint32 args_size = 12;
bytes args = 13;
bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput
uint32 task_info_size = 15;
bytes kernel_ext_info = 16;
uint32 kernel_ext_info_size = 17;
}


message KernelHcclDef {
uint32 op_index = 8;
string hccl_type = 9;
}


message EventExDef {
uint32 op_index = 1;
uint32 event_type = 2;
}

message LogTimeStampDef {
uint64 logid = 1;
bool notify = 2;
uint32 flat = 3;
}

message MemcpyAsyncDef {
uint64 dst = 1;
uint64 dst_max = 2;
uint64 src = 3;
uint64 count = 4;
uint32 kind = 5;
uint32 op_index = 6;
}

message StreamSwitchDef {
uint32 op_index = 1;
uint32 true_stream_id = 2;
int64 value = 3;
uint64 value_ptr = 4;
uint32 data_type = 5;
}

message StreamActiveDef {
uint32 op_index = 1;
uint32 active_stream_id = 2;
}

message StreamSwitchNDef {
uint32 op_index = 1;
uint32 size = 2;
repeated int64 target_value = 3;
repeated uint32 true_stream_id = 4;
uint32 element_size = 5;
uint32 data_type = 6;
}

message LabelSetDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelGotoExDef {
uint32 op_index = 1;
uint32 label_id = 2;
uint32 model_id = 3;
}

message LabelSwitchByIndexDef {
uint32 op_index = 1;
uint32 label_max = 2;
}

+ 0
- 62
ge/proto/tensorflow/attr_value.proto View File

@@ -1,62 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "AttrValueProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "tensor.proto";
import "tensor_shape.proto";
import "types.proto";

// Protocol buffer representing the value for an attr used to configure an Op.
// Comment indicates the corresponding attr type. Only the field matching the
// attr type may be filled.
message AttrValue {
// LINT.IfChange
message ListValue {
repeated bytes s = 2; // "list(string)"
repeated int64 i = 3 [packed = true]; // "list(int)"
repeated float f = 4 [packed = true]; // "list(float)"
repeated bool b = 5 [packed = true]; // "list(bool)"
repeated DataType type = 6 [packed = true]; // "list(type)"
repeated TensorShapeProto shape = 7; // "list(shape)"
repeated TensorProto tensor = 8; // "list(tensor)"
repeated NameAttrList func = 9; // "list(attr)"
}
// LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc)

oneof value {
bytes s = 2; // "string"
int64 i = 3; // "int"
float f = 4; // "float"
bool b = 5; // "bool"
DataType type = 6; // "type"
TensorShapeProto shape = 7; // "shape"
TensorProto tensor = 8; // "tensor"
ListValue list = 1; // any "list(...)"

// "func" represents a function. func.name is a function's name or
// a primitive op's name. func.attr.first is the name of an attr
// defined for that function. func.attr.second is the value for
// that attr in the instantiation.
NameAttrList func = 10;

// This is a placeholder only used in nodes defined inside a
// function. It indicates the attr value will be supplied when
// the function is instantiated. For example, let us suppose a
// node "N" in function "FN". "N" has an attr "A" with value
// placeholder = "foo". When FN is instantiated with attr "foo"
// set to "bar", the instantiated node N's attr A will have been
// given the value "bar".
string placeholder = 9;
}
}

// A list of attr names and their values. The whole list is attached
// with a string name. E.g., MatMul[T=float].
message NameAttrList {
string name = 1;
map<string, AttrValue> attr = 2;
}

+ 0
- 100
ge/proto/tensorflow/function.proto View File

@@ -1,100 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "FunctionProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";
import "node_def.proto";
import "op_def.proto";

// A library is a set of named functions.
message FunctionDefLibrary {
repeated FunctionDef function = 1;
repeated GradientDef gradient = 2;
}

// A function can be instantiated when the runtime can bind every attr
// with a value. When a GraphDef has a call to a function, it must
// have binding for every attr defined in the signature.
// * device spec, etc.
message FunctionDef {
// The definition of the function's name, arguments, return values,
// attrs etc.
OpDef signature = 1;

// Attributes specific to this function definition.
map<string, AttrValue> attr = 5;

// NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21.
reserved 2;

// In both of the following fields, there is the need to specify an
// output that is used as either the input to another node (in
// `node_def`) or as a return value of the function (in `ret`).
// Unlike the NodeDefs in GraphDef, we need to be able to specify a
// list in some cases (instead of just single outputs). Also, we
// need to be able to deal with lists of unknown length (so the
// output index may not be known at function definition time). So
// we use the following format instead:
// * "fun_in" where "fun_in" is the name of a function input arg in
// the `signature` field above. This represents that input, whether
// it is a single tensor or a list.
// * "fun_in:0" gives the first element of a function input arg (a
// non-list input is considered a list of length 1 for these
// purposes).
// * "node:out" where "node" is the name of a node in `node_def` and
// "out" is the name one of its op's output arguments (the name
// comes from the OpDef of the node's op). This represents that
// node's output, whether it is a single tensor or a list.
// Note: We enforce that an op's output arguments are never
// renamed in the backwards-compatibility test.
// * "node:out:0" gives the first element of a node output arg (a
// non-list output is considered a list of length 1 for these
// purposes).
//
// NOT CURRENTLY SUPPORTED (but may be in the future):
// * "node:out:-1" gives last element in a node output list
// * "node:out:1:" gives a list with all but the first element in a
// node output list
// * "node:out::-1" gives a list with all but the last element in a
// node output list

// The body of the function. Unlike the NodeDefs in a GraphDef, attrs
// may have values of type `placeholder` and the `input` field uses
// the "output" format above.

// By convention, "op" in node_def is resolved by consulting with a
// user-defined library first. If not resolved, "func" is assumed to
// be a builtin op.
repeated NodeDef node_def = 3;

// A mapping from the output arg names from `signature` to the
// outputs from `node_def` that should be returned by the function.
map<string, string> ret = 4;
}

// GradientDef defines the gradient function of a function defined in
// a function library.
//
// A gradient function g (specified by gradient_func) for a function f
// (specified by function_name) must follow the following:
//
// The function 'f' must be a numerical function which takes N inputs
// and produces M outputs. Its gradient function 'g', which is a
// function taking N + M inputs and produces N outputs.
//
// I.e. if we have
// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N),
// then, g is
// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N,
// dL/dy1, dL/dy2, ..., dL/dy_M),
// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the
// loss function). dL/dx_i is the partial derivative of L with respect
// to x_i.
message GradientDef {
string function_name = 1; // The function name.
string gradient_func = 2; // The gradient function's name.
}

+ 0
- 56
ge/proto/tensorflow/graph.proto View File

@@ -1,56 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "GraphProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "node_def.proto";
import "function.proto";
import "versions.proto";

// Represents the graph of operations
message GraphDef {
repeated NodeDef node = 1;

// Compatibility versions of the graph. See core/public/version.h for version
// history. The GraphDef version is distinct from the TensorFlow version, and
// each release of TensorFlow will support a range of GraphDef versions.
VersionDef versions = 4;

// Deprecated single version field; use versions above instead. Since all
// GraphDef changes before "versions" was introduced were forward
// compatible, this field is entirely ignored.
int32 version = 3 [deprecated = true];

// EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET.
//
// "library" provides user-defined functions.
//
// Naming:
// * library.function.name are in a flat namespace.
// NOTE: We may need to change it to be hierarchical to support
// different orgs. E.g.,
// { "/google/nn", { ... }},
// { "/google/vision", { ... }}
// { "/org_foo/module_bar", { ... }}
// map<string, FunctionDefLib> named_lib;
// * If node[i].op is the name of one function in "library",
// node[i] is deemed as a function call. Otherwise, node[i].op
// must be a primitive operation supported by the runtime.
//
//
// Function call semantics:
//
// * The callee may start execution as soon as some of its inputs
// are ready. The caller may want to use Tuple() mechanism to
// ensure all inputs are ready in the same time.
//
// * The consumer of return values may start executing as soon as
// the return values the consumer depends on are ready. The
// consumer may want to use Tuple() mechanism to ensure the
// consumer does not start until all return values of the callee
// function are ready.
FunctionDefLibrary library = 2;
};

+ 0
- 14
ge/proto/tensorflow/graph_library.proto View File

@@ -1,14 +0,0 @@
syntax = "proto3";

package domi.tensorflow;

import "graph.proto";

message GeGraphDef {
string name = 1;
GraphDef graph = 2;
}

message GraphDefLibrary {
repeated GeGraphDef graph_def = 1;
};

+ 0
- 63
ge/proto/tensorflow/node_def.proto View File

@@ -1,63 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "NodeProto";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";

message NodeDef {
// The name given to this operator. Used for naming inputs,
// logging, visualization, etc. Unique within a single GraphDef.
// Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*".
string name = 1;

// The operation name. There may be custom parameters in attrs.
// Op names starting with an underscore are reserved for internal use.
string op = 2;

// Each input is "node:src_output" with "node" being a string name and
// "src_output" indicating which output tensor to use from "node". If
// "src_output" is 0 the ":0" suffix can be omitted. Regular inputs
// may optionally be followed by control inputs that have the format
// "^node".
repeated string input = 3;

// A (possibly partial) specification for the device on which this
// node should be placed.
// The expected syntax for this string is as follows:
//
// DEVICE_SPEC ::= PARTIAL_SPEC
//
// PARTIAL_SPEC ::= ("/" CONSTRAINT) *
// CONSTRAINT ::= ("job:" JOB_NAME)
// | ("replica:" [1-9][0-9]*)
// | ("task:" [1-9][0-9]*)
// | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") )
//
// Valid values for this string include:
// * "/job:worker/replica:0/task:1/device:GPU:3" (full specification)
// * "/job:worker/device:GPU:3" (partial specification)
// * "" (no specification)
//
// If the constraints do not resolve to a single device (or if this
// field is empty or not present), the runtime will attempt to
// choose a device automatically.
string device = 4;

// Operation-specific graph-construction-time configuration.
// Note that this should include all attrs defined in the
// corresponding OpDef, including those with a value matching
// the default -- this allows the default to change and makes
// NodeDefs easier to interpret on their own. However, if
// an attr with a default is not specified in this list, the
// default will be used.
// The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and
// one of the names from the corresponding OpDef's attr field).
// The values must have a type matching the corresponding OpDef
// attr's type field.
// Add some examples here showing best practices.
map<string, AttrValue> attr = 5;
};

+ 0
- 164
ge/proto/tensorflow/op_def.proto View File

@@ -1,164 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "OpDefProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "attr_value.proto";
import "types.proto";

// Defines an operation. A NodeDef in a GraphDef specifies an Op by
// using the "op" field which should match the name of a OpDef.
// LINT.IfChange
message OpDef {
// Op names starting with an underscore are reserved for internal use.
// Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*".
string name = 1;

// For describing inputs and outputs.
message ArgDef {
// Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*".
string name = 1;

// Human readable description.
string description = 2;

// Describes the type of one or more tensors that are accepted/produced
// by this input/output arg. The only legal combinations are:
// * For a single tensor: either the "type" field is set or the
// "type_attr" field is set to the name of an attr with type "type".
// * For a sequence of tensors with the same type: the "number_attr"
// field will be set to the name of an attr with type "int", and
// either the "type" or "type_attr" field will be set as for
// single tensors.
// * For a sequence of tensors, the "type_list_attr" field will be set
// to the name of an attr with type "list(type)".
DataType type = 3;
string type_attr = 4; // if specified, attr must have type "type"
string number_attr = 5; // if specified, attr must have type "int"
// If specified, attr must have type "list(type)", and none of
// type, type_attr, and number_attr may be specified.
string type_list_attr = 6;

// For inputs: if true, the inputs are required to be refs.
// By default, inputs can be either refs or non-refs.
// For outputs: if true, outputs are refs, otherwise they are not.
bool is_ref = 16;
};

// Description of the input(s).
repeated ArgDef input_arg = 2;

// Description of the output(s).
repeated ArgDef output_arg = 3;

// Description of the graph-construction-time configuration of this
// Op. That is to say, this describes the attr fields that will
// be specified in the NodeDef.
message AttrDef {
// A descriptive name for the argument. May be used, e.g. by the
// Python client, as a keyword argument name, and so should match
// the regexp "[a-z][a-z0-9_]+".
string name = 1;

// One of the type names from attr_value.proto ("string", "list(string)",
// "int", etc.).
string type = 2;

// A reasonable default for this attribute if the user does not supply
// a value. If not specified, the user must supply a value.
AttrValue default_value = 3;

// Human-readable description.
string description = 4;


// --- Constraints ---
// These constraints are only in effect if specified. Default is no
// constraints.

// For type == "int", this is a minimum value. For "list(___)"
// types, this is the minimum length.
bool has_minimum = 5;
int64 minimum = 6;

// The set of allowed values. Has type that is the "list" version
// of the "type" field above (uses the "list" field of AttrValue).
// If type == "type" or "list(type)" above, then the "type" field
// of "allowed_values.list" has the set of allowed DataTypes.
// If type == "string" or "list(string)", then the "s" field of
// "allowed_values.list" has the set of allowed strings.
AttrValue allowed_values = 7;
}
repeated AttrDef attr = 4;

// Optional deprecation based on GraphDef versions.
OpDeprecation deprecation = 8;

// One-line human-readable description of what the Op does.
string summary = 5;

// Additional, longer human-readable description of what the Op does.
string description = 6;

// -------------------------------------------------------------------------
// Which optimizations this operation can participate in.

// True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs)
bool is_commutative = 18;

// If is_aggregate is true, then this operation accepts N >= 2
// inputs and produces 1 output all of the same type. Should be
// associative and commutative, and produce output with the same
// shape as the input. The optimizer may replace an aggregate op
// taking input from multiple devices with a tree of aggregate ops
// that aggregate locally within each device (and possibly within
// groups of nearby devices) before communicating.
bool is_aggregate = 16; // for things like add

// Other optimizations go here, like
// can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc.

// -------------------------------------------------------------------------
// Optimization constraints.

// Ops are marked as stateful if their behavior depends on some state beyond
// their input tensors (e.g. variable reading op) or if they have
// a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops
// must always produce the same output for the same input and have
// no side-effects.
//
// By default Ops may be moved between devices. Stateful ops should
// either not be moved, or should only be moved if that state can also
// be moved (e.g. via some sort of save / restore).
// Stateful ops are guaranteed to never be optimized away by Common
// Subexpression Elimination (CSE).
bool is_stateful = 17; // for things like variables, queue

// -------------------------------------------------------------------------
// Non-standard options.

// By default, all inputs to an Op must be initialized Tensors. Ops
// that may initialize tensors for the first time should set this
// field to true, to allow the Op to take an uninitialized Tensor as
// input.
bool allows_uninitialized_input = 19; // for Assign, etc.
};
// LINT.ThenChange(
// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc)

// Information about version-dependent deprecation of an op
message OpDeprecation {
// First GraphDef version at which the op is disallowed.
int32 version = 1;

// Explanation of why it was deprecated and what to use instead.
string explanation = 2;
};

// A collection of OpDefs
message OpList {
repeated OpDef op = 1;
};

+ 0
- 29
ge/proto/tensorflow/resource_handle.proto View File

@@ -1,29 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "ResourceHandle";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Protocol buffer representing a handle to a tensorflow resource. Handles are
// not valid across executions, but can be serialized back and forth from within
// a single run.
message ResourceHandleProto {
// Unique name for the device containing the resource.
string device = 1;

// Container in which this resource is placed.
string container = 2;

// Unique name of this resource.
string name = 3;

// Hash code for the type of the resource. Is only valid in the same device
// and in the same execution.
uint64 hash_code = 4;

// For debug-only, the name of the type pointed to by this handle, if
// available.
string maybe_type_name = 5;
};

+ 0
- 94
ge/proto/tensorflow/tensor.proto View File

@@ -1,94 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TensorProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

import "resource_handle.proto";
import "tensor_shape.proto";
import "types.proto";

// Protocol buffer representing a tensor.
message TensorProto {
DataType dtype = 1;

// Shape of the tensor.
TensorShapeProto tensor_shape = 2;

// Only one of the representations below is set, one of "tensor_contents" and
// the "xxx_val" attributes. We are not using oneof because as oneofs cannot
// contain repeated fields it would require another extra set of messages.

// Version number.
//
// In version 0, if the "repeated xxx" representations contain only one
// element, that element is repeated to fill the shape. This makes it easy
// to represent a constant Tensor with a single value.
int32 version_number = 3;

// Serialized raw tensor content from either Tensor::AsProtoTensorContent or
// memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation
// can be used for all tensor types. The purpose of this representation is to
// reduce serialization overhead during RPC call by avoiding serialization of
// many repeated small items.
bytes tensor_content = 4;

// Type specific representations that make it easy to create tensor protos in
// all languages. Only the representation corresponding to "dtype" can
// be set. The values hold the flattened representation of the tensor in
// row major order.

// DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll
// have some pointless zero padding for each value here.
repeated int32 half_val = 13 [packed = true];

// DT_FLOAT.
repeated float float_val = 5 [packed = true];

// DT_DOUBLE.
repeated double double_val = 6 [packed = true];

// DT_INT32, DT_INT16, DT_INT8, DT_UINT8.
repeated int32 int_val = 7 [packed = true];

// DT_STRING
repeated bytes string_val = 8;

// DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real
// and imaginary parts of i-th single precision complex.
repeated float scomplex_val = 9 [packed = true];

// DT_INT64
repeated int64 int64_val = 10 [packed = true];

// DT_BOOL
repeated bool bool_val = 11 [packed = true];

// DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real
// and imaginary parts of i-th double precision complex.
repeated double dcomplex_val = 12 [packed = true];

// DT_RESOURCE
repeated ResourceHandleProto resource_handle_val = 14;

// DT_VARIANT
repeated VariantTensorDataProto variant_val = 15;

// DT_UINT32
repeated uint32 uint32_val = 16 [packed = true];

// DT_UINT64
repeated uint64 uint64_val = 17 [packed = true];
};

// Protocol buffer representing the serialization format of DT_VARIANT tensors.
message VariantTensorDataProto {
// Name of the type of objects being serialized.
string type_name = 1;
// Portions of the object that are not Tensors.
bytes metadata = 2;
// Tensors contained within objects being serialized.
repeated TensorProto tensors = 3;
}

+ 0
- 45
ge/proto/tensorflow/tensor_shape.proto View File

@@ -1,45 +0,0 @@
// Protocol buffer representing the shape of tensors.

syntax = "proto3";
option cc_enable_arenas = true;
option java_outer_classname = "TensorShapeProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

package domi.tensorflow;

// Dimensions of a tensor.
message TensorShapeProto {
// One dimension of the tensor.
message Dim {
// Size of the tensor in that dimension.
// This value must be >= -1, but values of -1 are reserved for "unknown"
// shapes (values of -1 mean "unknown" dimension). Certain wrappers
// that work with TensorShapeProto may fail at runtime when deserializing
// a TensorShapeProto containing a dim value of -1.
int64 size = 1;

// Optional name of the tensor dimension.
string name = 2;
};

// Dimensions of the tensor, such as {"input", 30}, {"output", 40}
// for a 30 x 40 2D tensor. If an entry has size -1, this
// corresponds to a dimension of unknown size. The names are
// optional.
//
// The order of entries in "dim" matters: It indicates the layout of the
// values in the tensor in-memory representation.
//
// The first entry in "dim" is the outermost dimension used to layout the
// values, the last entry is the innermost dimension. This matches the
// in-memory layout of RowMajor Eigen tensors.
//
// If "dim.size()" > 0, "unknown_rank" must be false.
repeated Dim dim = 2;

// If true, the number of dimensions in the shape is unknown.
//
// If true, "dim.size()" must be 0.
bool unknown_rank = 3;
};

+ 0
- 74
ge/proto/tensorflow/types.proto View File

@@ -1,74 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "TypesProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// LINT.IfChange
enum DataType {
// Not a legal value for DataType. Used to indicate a DataType field
// has not been set.
DT_INVALID = 0;

// Data types that all computation devices are expected to be
// capable to support.
DT_FLOAT = 1;
DT_DOUBLE = 2;
DT_INT32 = 3;
DT_UINT8 = 4;
DT_INT16 = 5;
DT_INT8 = 6;
DT_STRING = 7;
DT_COMPLEX64 = 8; // Single-precision complex
DT_INT64 = 9;
DT_BOOL = 10;
DT_QINT8 = 11; // Quantized int8
DT_QUINT8 = 12; // Quantized uint8
DT_QINT32 = 13; // Quantized int32
DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops.
DT_QINT16 = 15; // Quantized int16
DT_QUINT16 = 16; // Quantized uint16
DT_UINT16 = 17;
DT_COMPLEX128 = 18; // Double-precision complex
DT_HALF = 19;
DT_RESOURCE = 20;
DT_VARIANT = 21; // Arbitrary C++ data types
DT_UINT32 = 22;
DT_UINT64 = 23;

// Do not use! These are only for parameters. Every enum above
// should have a corresponding value below (verified by types_test).
DT_FLOAT_REF = 101;
DT_DOUBLE_REF = 102;
DT_INT32_REF = 103;
DT_UINT8_REF = 104;
DT_INT16_REF = 105;
DT_INT8_REF = 106;
DT_STRING_REF = 107;
DT_COMPLEX64_REF = 108;
DT_INT64_REF = 109;
DT_BOOL_REF = 110;
DT_QINT8_REF = 111;
DT_QUINT8_REF = 112;
DT_QINT32_REF = 113;
DT_BFLOAT16_REF = 114;
DT_QINT16_REF = 115;
DT_QUINT16_REF = 116;
DT_UINT16_REF = 117;
DT_COMPLEX128_REF = 118;
DT_HALF_REF = 119;
DT_RESOURCE_REF = 120;
DT_VARIANT_REF = 121;
DT_UINT32_REF = 122;
DT_UINT64_REF = 123;
}
// LINT.ThenChange(
// https://www.tensorflow.org/code/tensorflow/c/c_api.h,
// https://www.tensorflow.org/code/tensorflow/go/tensor.go,
// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc,
// https://www.tensorflow.org/code/tensorflow/core/framework/types.h,
// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc,
// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py,
// https://www.tensorflow.org/code/tensorflow/python/framework/function.py)

+ 0
- 31
ge/proto/tensorflow/versions.proto View File

@@ -1,31 +0,0 @@
syntax = "proto3";

package domi.tensorflow;
option cc_enable_arenas = true;
option java_outer_classname = "VersionsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";

// Version information for a piece of serialized data
//
// There are different types of versions for each type of data
// (GraphDef, etc.), but they all have the same common shape
// described here.
//
// Each consumer has "consumer" and "min_producer" versions (specified
// elsewhere). A consumer is allowed to consume this data if
//
// producer >= min_producer
// consumer >= min_consumer
// consumer not in bad_consumers
//
message VersionDef {
// The version of the code that produced this data.
int32 producer = 1;

// Any consumer below this version is not allowed to consume this data.
int32 min_consumer = 2;

// Specific consumer versions which are disallowed (e.g. due to bugs).
repeated int32 bad_consumers = 3;
};

+ 0
- 3
ge/session/readme.txt View File

@@ -1,3 +0,0 @@
GE
SessionManager
InnerSession

+ 141
- 0
inc/common/blocking_queue.h View File

@@ -0,0 +1,141 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_BLOCKING_QUEUE_H_
#define INC_COMMON_BLOCKING_QUEUE_H_

#include <stdint.h>
#include <condition_variable>
#include <list>
#include <mutex>

static const int kDefaultMaxQueueSize = 2048;

template <typename T>
class BlockingQueue {
public:
explicit BlockingQueue(uint32_t max_size = kDefaultMaxQueueSize) : max_size_(max_size), is_stoped_(false) {}

~BlockingQueue() {}

bool Pop(T &item) {
std::unique_lock<std::mutex> lock(mutex_);

while (queue_.empty() && !is_stoped_) {
empty_cond_.wait(lock);
}

if (is_stoped_) {
return false;
}

item = std::move(queue_.front());
queue_.pop_front();

full_cond_.notify_one();

return true;
}

bool Push(const T &item, bool is_wait = true) {
std::unique_lock<std::mutex> lock(mutex_);

while (queue_.size() >= max_size_ && !is_stoped_) {
if (!is_wait) {
return false;
}
full_cond_.wait(lock);
}

if (is_stoped_) {
return false;
}

queue_.push_back(item);

empty_cond_.notify_one();

return true;
}

bool Push(T &&item, bool is_wait = true) {
std::unique_lock<std::mutex> lock(mutex_);

while (queue_.size() >= max_size_ && !is_stoped_) {
if (!is_wait) {
return false;
}
full_cond_.wait(lock);
}

if (is_stoped_) {
return false;
}

queue_.emplace_back(std::move(item));

empty_cond_.notify_one();

return true;
}

void Stop() {
{
std::unique_lock<std::mutex> lock(mutex_);
is_stoped_ = true;
}

full_cond_.notify_all();
empty_cond_.notify_all();
}

void Restart() {
std::unique_lock<std::mutex> lock(mutex_);
is_stoped_ = false;
}

// if the queue is stoped ,need call this function to release the unprocessed items
std::list<T> GetRemainItems() {
std::unique_lock<std::mutex> lock(mutex_);

if (!is_stoped_) {
return std::list<T>();
}

return queue_;
}

bool IsFull() {
std::unique_lock<std::mutex> lock(mutex_);
return queue_.size() >= max_size_;
}

void Clear() {
std::unique_lock<std::mutex> lock(mutex_);
queue_.clear();
}

private:
std::list<T> queue_;
std::mutex mutex_;
std::condition_variable empty_cond_;
std::condition_variable full_cond_;
uint32_t max_size_;

bool is_stoped_;
};

#endif // INC_COMMON_BLOCKING_QUEUE_H_

+ 104
- 0
inc/common/dynamic_aipp.h View File

@@ -0,0 +1,104 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_DYNAMIC_AIPP_H_
#define INC_COMMON_DYNAMIC_AIPP_H_

#include <stdint.h>

/**
* @ingroup dnn
* @brief struct define of dynamic aipp batch parameter.
*/
typedef struct tagAippDynamicBatchPara {
int8_t cropSwitch; // crop switch
int8_t scfSwitch; // resize switch
int8_t paddingSwitch; // 0: unable padding
// 1: padding config value,sfr_filling_hblank_ch0 ~ sfr_filling_hblank_ch2
// 2: padding source picture data, single row/collumn copy
// 3: padding source picture data, block copy
// 4: padding source picture data, mirror copy
int8_t rotateSwitch; // rotate switch,0: non-ratate,
// 1: ratate 90° clockwise,2: ratate 180° clockwise,3: ratate 270° clockwise
int8_t reserve[4];
int32_t cropStartPosW; // the start horizontal position of cropping
int32_t cropStartPosH; // the start vertical position of cropping
int32_t cropSizeW; // crop width
int32_t cropSizeH; // crop height

int32_t scfInputSizeW; // input width of scf
int32_t scfInputSizeH; // input height of scf
int32_t scfOutputSizeW; // output width of scf
int32_t scfOutputSizeH; // output height of scf

int32_t paddingSizeTop; // top padding size
int32_t paddingSizeBottom; // bottom padding size
int32_t paddingSizeLeft; // left padding size
int32_t paddingSizeRight; // right padding size

int16_t dtcPixelMeanChn0; // mean value of channel 0
int16_t dtcPixelMeanChn1; // mean value of channel 1
int16_t dtcPixelMeanChn2; // mean value of channel 2
int16_t dtcPixelMeanChn3; // mean value of channel 3

uint16_t dtcPixelMinChn0; // min value of channel 0
uint16_t dtcPixelMinChn1; // min value of channel 1
uint16_t dtcPixelMinChn2; // min value of channel 2
uint16_t dtcPixelMinChn3; // min value of channel 3
uint16_t dtcPixelVarReciChn0; // sfr_dtc_pixel_variance_reci_ch0
uint16_t dtcPixelVarReciChn1; // sfr_dtc_pixel_variance_reci_ch1
uint16_t dtcPixelVarReciChn2; // sfr_dtc_pixel_variance_reci_ch2
uint16_t dtcPixelVarReciChn3; // sfr_dtc_pixel_variance_reci_ch3

int8_t reserve1[16]; // 32B assign, for ub copy
} kAippDynamicBatchPara;

/**
* @ingroup dnn
* @brief struct define of dynamic aipp parameter. lite:64+96*batchNum byte ; tiny:64+64*batchNum byte
*/
typedef struct tagAippDynamicPara {
uint8_t inputFormat; // input format:YUV420SP_U8/XRGB8888_U8/RGB888_U8
int8_t cscSwitch; // csc switch
int8_t rbuvSwapSwitch; // rb/ub swap switch
int8_t axSwapSwitch; // RGBA->ARGB, YUVA->AYUV swap switch
int8_t batchNum; // batch parameter number
int8_t reserve1[3];
int32_t srcImageSizeW; // source image width
int32_t srcImageSizeH; // source image height
int16_t cscMatrixR0C0; // csc_matrix_r0_c0
int16_t cscMatrixR0C1; // csc_matrix_r0_c1
int16_t cscMatrixR0C2; // csc_matrix_r0_c2
int16_t cscMatrixR1C0; // csc_matrix_r1_c0
int16_t cscMatrixR1C1; // csc_matrix_r1_c1
int16_t cscMatrixR1C2; // csc_matrix_r1_c2
int16_t cscMatrixR2C0; // csc_matrix_r2_c0
int16_t cscMatrixR2C1; // csc_matrix_r2_c1
int16_t cscMatrixR2C2; // csc_matrix_r2_c2
int16_t reserve2[3];
uint8_t cscOutputBiasR0; // output Bias for RGB to YUV, element of row 0, unsigned number
uint8_t cscOutputBiasR1; // output Bias for RGB to YUV, element of row 1, unsigned number
uint8_t cscOutputBiasR2; // output Bias for RGB to YUV, element of row 2, unsigned number
uint8_t cscInputBiasR0; // input Bias for YUV to RGB, element of row 0, unsigned number
uint8_t cscInputBiasR1; // input Bias for YUV to RGB, element of row 1, unsigned number
uint8_t cscInputBiasR2; // input Bias for YUV to RGB, element of row 2, unsigned number
uint8_t reserve3[2];
int8_t reserve4[16]; // 32B assign, for ub copy

kAippDynamicBatchPara aippBatchPara; // allow transfer several batch para.
} kAippDynamicPara;

#endif // INC_COMMON_DYNAMIC_AIPP_H_

+ 94
- 0
inc/common/npu_error_define.h View File

@@ -0,0 +1,94 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_NPU_ERROR_DEFINE_H_
#define INC_COMMON_NPU_ERROR_DEFINE_H_

typedef enum tagHiAiNpuLocal {
HIAI_HOST = 1,
HIAI_DEVICE = 2,
} HiAiNpuLocal;

typedef enum tagHiAiNpuCodeType {
ERROR_CODE = 1,
EXCEPTION_CODE = 2,
} HiAiNpuCodeType;

typedef enum tagHiAiNpuErrLevel {
NONE_LEVEL = 0,
SUGGESTION_LEVEL = 1,
NORMAL_LEVEL = 2,
SERIOUS_LEVEL = 3,
CRITICAL_ERROR = 4,
} HiAiNpuErrLevel;

typedef enum tagHiAiNpuModuleId {
HIAI_DRIVER = 1,
HIAI_CTRLCPU = 2,
HIAI_TS = 3,
HIAI_RUNTIME = 4,
HIAI_AICPU = 5,
HIAI_CCE = 6,
HIAI_TVM = 7,
HIAI_FRAMEWORK = 8,
HiAI_ENGINE = 9,
HIAI_DVPP = 10,
HIAI_AIPP = 11,
HIAI_LOWPOWER = 12,
HIAI_MDC = 13,
HIAI_COMPILE = 14,
HIAI_TOOLCHIAN = 15,
HIAI_ALG = 16,
HIAI_PROFILING = 17,
HIAI_HCCL = 18,
HIAI_SIMULATION = 19,
HIAI_BIOS = 20,
HIAI_SEC = 21,
HIAI_TINY = 22,
HIAI_DP = 23,
} HiAiNpuModuleId;

/* bit 31-bit30 to be hiai local */
#define HIAI_NPULOCAL_MASK 0xC0000000
#define SHIFT_LOCAL_MASK 30
#define HIAI_NPULOCAL_VAL_MASK 0x3
/* bit 29 -bit28 to be hiai aicpu code type */
#define HIAI_CODE_TYPE_MASK 0x30000000
#define SHIFT_CODE_MASK 28
#define HIAI_CODE_TYPE_VAL_MASK 0x3
/* bit 27 -bit25 to be hiai error level */
#define HIAI_ERROR_LEVEL_MASK 0x0E000000
#define SHIFT_ERROR_LVL_MASK 25
#define HIAI_ERROR_LEVEL_VAL_MASK 0x7
/* bit 24 -bit17 to be hiai mod */
#define HIAI_MODE_ID_MASK 0x01FE0000
#define SHIFT_MODE_MASK 17
#define HIAI_MODE_ID_VAL_MASK 0xFF

#define HIAI_NPU_LOC_BIT(a) \
(HIAI_NPULOCAL_MASK & ((unsigned int)((HiAiNpuLocal)(a)) & HIAI_NPULOCAL_VAL_MASK) << SHIFT_LOCAL_MASK)
#define HIAI_NPU_CODE_TYPE_BIT(a) \
(HIAI_CODE_TYPE_MASK & ((unsigned int)((HiAiNpuCodeType)(a)) & HIAI_CODE_TYPE_VAL_MASK) << SHIFT_CODE_MASK)
#define HIAI_NPU_ERR_LEV_BIT(a) \
(HIAI_ERROR_LEVEL_MASK & ((unsigned int)((HiAiNpuErrLevel)(a)) & HIAI_ERROR_LEVEL_VAL_MASK) << SHIFT_ERROR_LVL_MASK)
#define HIAI_NPU_MOD_ID_BIT(a) \
(HIAI_MODE_ID_MASK & ((unsigned int)((HiAiNpuModuleId)(a)) & HIAI_MODE_ID_VAL_MASK) << SHIFT_MODE_MASK)

#define HIAI_NPU_ERR_CODE_HEAD(npuLocal, codeType, errLevel, moduleId) \
(HIAI_NPU_LOC_BIT(npuLocal) + HIAI_NPU_CODE_TYPE_BIT(codeType) + HIAI_NPU_ERR_LEV_BIT(errLevel) + \
HIAI_NPU_MOD_ID_BIT(moduleId))

#endif // INC_COMMON_NPU_ERROR_DEFINE_H_

+ 74
- 0
inc/common/opskernel/ge_task_info.h View File

@@ -0,0 +1,74 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_
#define INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_

#include <runtime/rt.h>
#include <stdint.h>
#include <string>
#include <vector>

using std::string;
namespace ge {
// when need to eliminate GETaskKernelHcclInfo, so not need DAVINCI_TRAIN/DAVINCI_CLOUD
struct GETaskKernelHcclInfo {
string input_name;
string hccl_type;
void *inputDataAddr;
void *outputDataAddr;
void *workSpaceAddr;
int32_t count;
int32_t dataType;
int32_t opType;
int64_t rootId;
uint64_t workSpaceMemSize;
std::vector<int64_t> dims;
std::vector<rtStream_t> hcclStreamList;
};

struct GETaskInfo {
uint32_t id;
uint16_t type;
uint32_t streamID;
void *stream; // rtKernelLaunch input argument
void *event;
void *privateDef;
uint32_t privateDefLen;
void *opsKernelStorePtr;

std::vector<GETaskKernelHcclInfo> kernelHcclInfo;
};

struct HcomOpertion {
std::string hcclType;
void *inputPtr;
void *outputPtr;
uint64_t count;
int32_t dataType;
int32_t opType;
int32_t root;
};

struct HcomRemoteAccessAddrInfo {
uint32_t remotetRankID;
uint64_t remoteAddr; // host embedding table address
uint64_t localAddr; // device HBM address
uint64_t length; // memory Length in Bytes
};

} // namespace ge
#endif // INC_COMMON_OPSKERNEL_GE_TASK_INFO_H_

+ 88
- 0
inc/common/opskernel/ops_kernel_info_store.h View File

@@ -0,0 +1,88 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_
#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_

#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "./ge_task_info.h"
#include "./ops_kernel_info_types.h"
#include "cce/aicpu_engine_struct.h"
#include "cce/fwk_adpt_struct.h"
#include "common/ge_inner_error_codes.h"
#include "graph/node.h"
#include "proto/task.pb.h"
using std::map;
using std::string;
using std::to_string;
using std::vector;

namespace ge {
class OpDesc;

class OpsKernelInfoStore {
public:
OpsKernelInfoStore() {}

virtual ~OpsKernelInfoStore() {}

// initialize opsKernelInfoStore
virtual Status Initialize(const map<string, string> &options) = 0; /*lint -e148*/

// close opsKernelInfoStore
virtual Status Finalize() = 0; /*lint -e148*/

virtual Status CreateSession(const std::map<std::string, std::string> &session_options) { return SUCCESS; }

virtual Status DestroySession(const std::map<std::string, std::string> &session_options) { return SUCCESS; }

// get all opsKernelInfo
virtual void GetAllOpsKernelInfo(map<string, OpInfo> &infos) const = 0;

// whether the opsKernelInfoStore is supported based on the operator attribute
virtual bool CheckSupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason) const = 0;

virtual bool CheckAccuracySupported(const OpDescPtr &opDescPtr, std::string &un_supported_reason,
bool realQuery = false) const {
return CheckSupported(opDescPtr, un_supported_reason);
}
// opsFlag opsFlag[0] indicates constant folding is supported or not
virtual void opsFlagCheck(const ge::Node &node, std::string &opsFlag){};

// memory allocation requirement
virtual Status CalcOpRunningParam(Node &node) = 0; /*lint -e148*/

// generate task for op。
virtual Status GenerateTask(const Node &node, RunContext &context,
std::vector<domi::TaskDef> &tasks) = 0; /*lint -e148*/

// only call fe engine interface to compile single op
virtual Status CompileOp(vector<ge::NodePtr> &node_vec) { return SUCCESS; }
virtual Status CompileOpRun(vector<ge::NodePtr> &node_vec) { return SUCCESS; }
// load task for op
virtual Status LoadTask(GETaskInfo &task) { return SUCCESS; }

// only call aicpu interface to generate task struct
virtual Status GenSingleOpRunTask(const NodePtr &node, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; }

// only call aicpu interface to generate task struct
virtual Status GenMemCopyTask(uint64_t count, STR_FWK_OP_KERNEL &task, string &task_info) { return SUCCESS; }
};
} // namespace ge
#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_STORE_H_

+ 66
- 0
inc/common/opskernel/ops_kernel_info_types.h View File

@@ -0,0 +1,66 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_
#define INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_

#include <stdint.h>
#include <string>
#include <vector>
#include "graph/buffer.h"
#include "runtime/rt_model.h"

using std::string;

namespace ge {
/*lint -e148*/
struct RunContext {
rtModel_t model;
rtStream_t stream;
uint64_t sessionId;
uint64_t dataMemSize;
uint8_t *dataMemBase;
uint64_t weightMemSize;
uint8_t *weightMemBase;
ge::Buffer weightsBuffer;
std::vector<rtStream_t> graphStreamList; // all streams of graph, order by ge stream id(0,1,...)
std::vector<rtEvent_t> graphEventList; // all events of graph, order by ge event id(0,1,...)
std::vector<rtLabel_t> graphLabelList; // all labels of graph, order by ge label id(0,1,...)
};

/*lint +e148*/

struct Task {
uint32_t id;
uint16_t type;
void *stream;
void *event;
};

struct OpInfo {
string engine; // which engin
/*lint -e148*/
string opKernelLib; // which opsKernelStore
int computeCost; // compute cost
bool flagPartial; // whether to support is related to shape
bool flagAsync; // Whether to support asynchronous
bool isAtomic; // whether to support atomic addr clean
string opFileName; // op file name
string opFuncName; // op function name
};
} // namespace ge

#endif // INC_COMMON_OPSKERNEL_OPS_KERNEL_INFO_TYPES_H_

+ 71
- 0
inc/common/optimizer/graph_optimizer.h View File

@@ -0,0 +1,71 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_
#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_

#include <map>
#include <string>
#include "./graph_optimizer_types.h"
#include "common/ge_inner_error_codes.h"
#include "common/opskernel/ops_kernel_info_types.h"
#include "graph/compute_graph.h"

using std::map;
using std::string;

/*lint -e148*/
namespace ge {
class GraphOptimizer {
public:
virtual ~GraphOptimizer() {}

// initialize graphOptimizer
virtual Status Initialize(const map<string, string> &options) = 0;

// close graphOptimizer
virtual Status Finalize() = 0;

// optimize original graph for FE quant optimize
virtual Status OptimizeGraphPrepare(ComputeGraph &graph) { return SUCCESS; }

// optimize graph before build for RTS
virtual Status OptimizeGraphBeforeBuild(ComputeGraph &graph) { return SUCCESS; }

// optimize original graph, using in graph preparation stage
virtual Status OptimizeOriginalGraph(ComputeGraph &graph) = 0;

// optimize original graph, using for conversion operator insert in graph preparation stage
virtual Status OptimizeOriginalGraphJudgeInsert(ComputeGraph &graph) { return SUCCESS; }

// optimize fused graph
virtual Status OptimizeFusedGraph(ComputeGraph &graph) = 0;

// optimize whole graph, using after graph merged stage
virtual Status OptimizeWholeGraph(ComputeGraph &graph) = 0;

// get attribute of graph optimizer
virtual Status GetAttributes(GraphOptimizerAttribute &attrs) const = 0;

// optimize streamed Graph
virtual Status OptimizeStreamGraph(ComputeGraph &graph, const RunContext &context) { return SUCCESS; }

// op compile
virtual Status OptimizeFusedGraphAfterGraphSlice(ComputeGraph &graph) { return SUCCESS; }
};
} // namespace ge
/*lint +e148*/
#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_H_

+ 34
- 0
inc/common/optimizer/graph_optimizer_types.h View File

@@ -0,0 +1,34 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_
#define INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_

#include <stdint.h>
#include <string>
namespace ge {
enum OPTIMIZER_SCOPE {
UNIT = 0,
ENGINE,
};

struct GraphOptimizerAttribute {
std::string engineName;
OPTIMIZER_SCOPE scope;
};
} // namespace ge

#endif // INC_COMMON_OPTIMIZER_GRAPH_OPTIMIZER_TYPES_H_

+ 41
- 0
inc/common/util/ai_core/common/aicore_util_attr_define.h View File

@@ -0,0 +1,41 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_
#define INC_COMMON_UTILS_AI_CORE_COMMON_ATTR_DEFINE_H_

#include <string>

namespace fe {
static const std::string SCOPE_ID_ATTR = "fusion_scope";

static const std::string FE_IMPLY_TYPE = "_fe_imply_type";

static const std::string PARENT_OP_TYPE = "parentOpType";

static const std::string ATTR_NAME_TASK_L2_FUSION_INFO_EXTEND_PTR = "task_l2_fusion_info_extend_content";

static const std::string ATTR_DATA_DUMP_REF = "_datadump_ref";

static const std::string ATTR_NAME_L2_FUSION_EXTEND_PTR = "l2_fusion_extend_content";

static const std::string L1_OPTIMIZED = "l1_optimized";

static const std::string L2_OPTIMIZED = "l2_optimized";

static const std::string OP_SLICE_INFO = "_op_slice_info";
} // namespace fe
#endif

+ 118
- 0
inc/common/util/ai_core/common/aicore_util_types.h View File

@@ -0,0 +1,118 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_
#define INC_COMMON_UTILS_AI_CORE_COMMON_TYPES_H_

#include "graph/anchor.h"
#include "graph/types.h"
#include "runtime/kernel.h"
#include <map>
#include <string>
#include <vector>

namespace fe {
struct FusionOpSrc {
uint32_t src_op_id;
ge::AnchorPtr src_anchor;
int32_t fusion_src_index;
int32_t fusion_dst_index;
};

struct FusionOpDst {
uint32_t dst_op_id;
ge::AnchorPtr dst_anchor;
};

struct FusionDataFlow {
std::pair<ge::AnchorPtr, ge::AnchorPtr> edge;
std::pair<std::string, ge::AnchorPtr> node_dataindex_pair;
};

typedef struct tagL2FusionData {
uint32_t l2Index;
uint64_t l2Addr;
uint64_t l2PageNum;
} L2FusionData_t;
typedef std::map<uint64_t, L2FusionData_t> L2FusionDataMap_t;

typedef struct tagFeSmDesc {
rtL2Ctrl_t l2ctrl;
std::string nodeName[8];
uint8_t outputIndex[8];
} feSmDesc_t;

typedef struct TagTaskL2FusionInfo {
std::string nodeName;
feSmDesc_t l2Info;
L2FusionDataMap_t input;
L2FusionDataMap_t output;
uint32_t isUsed;
} TaskL2FusionInfo_t;

using L2FusionInfoPtr = std::shared_ptr<TaskL2FusionInfo_t>;

typedef struct ToOpStruct {
int64_t opL1Space = 0;
std::vector<int64_t> opL1FusionType;
int64_t opL1WorkspaceFlag = 0; // for workspace flag
int64_t opL1WorkspaceSize = 0;
std::vector<std::vector<int64_t>> validInputShape;
std::vector<std::vector<int64_t>> validOutputShape;
std::vector<std::vector<int64_t>> sliceInputOffset; // conv & pooling & ReadSelect
std::vector<std::vector<int64_t>> sliceOutputOffset; // WriteSelect
std::vector<uint32_t> totalShape;
uint32_t splitIndex = 0;
ToOpStruct() {
// set invalid value for essential variable
opL1Space = -1;
opL1WorkspaceSize = -1;
}
} ToOpStruct_t;

enum OpImplType {
EN_IMPL_CUSTOM_CONSTANT_CCE = 0, // custom constant op
EN_IMPL_CUSTOM_TIK, // custom tik op
EN_IMPL_CUSTOM_TBE, // custom tbe op
EN_IMPL_HW_CONSTANT_CCE, // Huawei built-in constant op
EN_IMPL_HW_GENERAL_CCE, // Huawei built-in cce op
EN_IMPL_HW_TIK, // Huawei built-in tik op
EN_IMPL_HW_TBE, // Huawei built-in tbe op
EN_IMPL_RL, // RL op
EN_IMPL_PLUGIN_TBE, // Huawei built-in tbe plugin op
EN_IMPL_VECTOR_CORE_HW_TBE, // Huawei built-in tbe op
EN_IMPL_VECTOR_CORE_CUSTOM_TBE, // custom tbe op
EN_IMPL_NON_PERSISTENT_CUSTOM_TBE, // custom tbe op
EN_RESERVED // reserved value
};

static const std::map<ge::DataType, uint32_t> DATATYPE_SIZE_MAP{{ge::DT_FLOAT, sizeof(float)},
{ge::DT_FLOAT16, sizeof(int16_t)},
{ge::DT_INT8, sizeof(int8_t)},
{ge::DT_INT32, sizeof(int32_t)},
{ge::DT_UINT8, sizeof(uint8_t)},
{ge::DT_UINT32, sizeof(uint32_t)},
{ge::DT_INT16, sizeof(int16_t)},
{ge::DT_UINT16, sizeof(uint16_t)},
{ge::DT_INT64, sizeof(int64_t)},
{ge::DT_UINT64, sizeof(uint64_t)},
{ge::DT_DOUBLE, sizeof(double)},
{ge::DT_BOOL, sizeof(bool)},
{ge::DT_DUAL, sizeof(float) + sizeof(int8_t)},
{ge::DT_DUAL_SUB_UINT8, sizeof(int8_t)},
{ge::DT_DUAL_SUB_INT8, sizeof(int8_t)}};
} // namespace fe
#endif

+ 107
- 0
inc/common/util/ai_core/common/graph_comm.h View File

@@ -0,0 +1,107 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_
#define INC_COMMON_UTILS_AI_CORE_COMMON_GRAPH_COMMON_H_

#include "graph/compute_graph.h"
#include "common/aicore_util_types.h"
#include "register/graph_optimizer/graph_optimize_register_error_codes.h"

#include <map>
#include <string>
#include <utility>
#include <vector>

namespace fe {

using kScopeNodeMap_t = std::map<int64_t, std::vector<ge::NodePtr>>;
using kScopeNodePair_t = std::pair<int64_t, std::vector<ge::NodePtr>>;

class GraphCommImpl;
using GraphCommImplPtr = std::unique_ptr<GraphCommImpl>;

class GraphComm {
public:
GraphComm(const string &engineName);
virtual ~GraphComm();
GraphComm(const GraphComm &in) = delete;
GraphComm &operator=(const GraphComm &in) = delete;

Status GetscopeNodeMap(ge::ComputeGraph &graph, kScopeNodeMap_t &fusionMap);

Status CopyFusionOpNodes(vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList,
vector<ge::NodePtr> &fusNodelist, ge::OpDescPtr fusionOpDesc,
ge::ComputeGraphPtr fusionGraph);

Status CopyFusionOpEdges(ge::OpDescPtr fusionOpDesc, ge::ComputeGraph &origGraph, ge::ComputeGraphPtr fusionGraph);

Status GetNodeDataFlowMap(const ge::NodePtr &fusNode,
std::map<ge::NodePtr, std::map<ge::AnchorPtr, ge::AnchorPtr>> &fusionOpAnchorsMap,
ge::kFusionDataFlowVec_t &fusDataflowList, const int &mapType);

Status GetFusionNodeEdgeList(std::vector<ge::NodePtr> &fusNodelist, std::vector<FusionDataFlow> &fusInputEdgeList,
std::vector<FusionDataFlow> &fusOutputEdgeList);
void ClearFusionSrc();

void ClearFusionDst();

void AddFusionOutputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_src_index,
std::pair<string, ge::AnchorPtr> &node_dataindex_pair);

void AddFusionInputSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, const int32_t &fusion_dst_index,
std::pair<string, ge::AnchorPtr> &node_dataindex_pair);

void SaveFusionDst(const uint32_t &dst_op_id, ge::AnchorPtr dst_anchor);

bool IsFusionDstExist(const uint32_t &dst_op_id, const ge::AnchorPtr &dst_anchor);

bool GetFusionSrc(const uint32_t &src_op_id, const ge::AnchorPtr &src_anchor, int32_t &fusion_src_index,
int32_t &fusion_dst_index);

Status GetFusionNodeCtrlEdgeList(vector<ge::NodePtr> &fusNodelist, vector<FusionDataFlow> &fusInputCtrlEdgeList,
vector<FusionDataFlow> &fusOutputCtrlEdgeList);

Status MergeFusionNodeEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist,
vector<FusionDataFlow> &fusInputEdgeList, vector<FusionDataFlow> &fusOutputEdgeList);

Status MergeFusionNodeCtrlEdgeList(ge::NodePtr &fusNode, vector<ge::NodePtr> &fusNodelist,
vector<FusionDataFlow> &fusInputEdgeList,
vector<FusionDataFlow> &fusOutputEdgeList);

string GetEngineName();

private:
Status MergeFusionNodeInputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist,
std::vector<FusionDataFlow> &fusInputEdgeList);
Status MergeFusionNodeOutputEdgeList(ge::NodePtr fusNode, std::vector<ge::NodePtr> &fusNodelist,
std::vector<FusionDataFlow> &fusOutputEdgeList);

string engineName_;

std::vector<FusionOpSrc> exist_fusion_src_list_;
std::vector<FusionOpDst> exist_fusion_dst_list_;

// std::vector<std::multimap<std::string, uint32_t>>
ge::kFusionDataFlowVec_t fusion_input_dataflow_list_;

// std::vector<std::multimap<std::string, ge::AnchorPtr>>
ge::kFusionDataFlowVec_t fusion_output_dataflow_list_;

GraphCommImplPtr graphCommImplPtr_;
};
} // namespace fe
#endif

+ 43
- 0
inc/common/util/ai_core/common/scope_allocator.h View File

@@ -0,0 +1,43 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_
#define INC_COMMON_UTILS_AI_CORE_COMMON_SCOPE_ALLOCATOR_H_

#include "graph/op_desc.h"

namespace fe {
class ScopeAllocator {
public:
ScopeAllocator();
virtual ~ScopeAllocator();
ScopeAllocator(const ScopeAllocator& in) = delete;
ScopeAllocator& operator=(const ScopeAllocator& in) = delete;

public:
void Init();
int64_t GetCurrentScopeId();
int64_t AllocateScopeId(void);
bool HasScopeAttr(ge::ConstOpDescPtr opdef);
bool GetScopeAttr(ge::ConstOpDescPtr opdef, int64_t& scopeId);
bool SetScopeAttr(ge::OpDescPtr opdef, int64_t scopeId);
bool ResetScopeId(int64_t scopeId);

private:
int64_t scopeId;
};
} // namespace fe
#endif

+ 33
- 0
inc/common/util/ai_core/param_calculate/aicore_param_calculator.h View File

@@ -0,0 +1,33 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef AICORE_PARAM_CALCULATOR
#define AICORE_PARAM_CALCULATOR

#include "graph/node.h"
#include "graph_optimizer/graph_optimize_register_error_codes.h"

namespace fe {
class AICoreParamCalculator {
public:
AICoreParamCalculator();

~AICoreParamCalculator();

Status CalcOpRunningParam(ge::Node &node);
};
} // namespace fe
#endif // AICORE_PARAM_CALCULATOR

+ 45
- 0
inc/common/util/ai_core/param_calculate/tensorsize_calculator.h View File

@@ -0,0 +1,45 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef TENSORSIZE_CALCULATOR_H
#define TENSORSIZE_CALCULATOR_H

#include "graph_optimizer/graph_optimize_register_error_codes.h"

#include <map>
#include <string>
#include "graph/compute_graph.h"
#include "graph/op_desc.h"

namespace fe {
class TensorSizeCalculator {
public:
/**
* Calculate the tensor size of input and output of each opdesc
* @param opDesc opdesc object
* @param opImplType op impl type
* @return status SUCCESS or FAILED
*/
static Status CalculateOpTensorSize(ge::OpDesc &opDesc);

private:
static Status CalcInputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag);

static Status CalcOutputOpTensorSize(ge::OpDesc &opDesc, int32_t &outputRealCalcFlag);
};
} // namespace fe

#endif // TENSORSIZE_CALCULATOR_H

+ 37
- 0
inc/common/util/compress/compress.h View File

@@ -0,0 +1,37 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef COMPRESS_H
#define COMPRESS_H

#include <uchar.h>

enum CmpStatus { RET_SUCCESS = 0, RET_ERROR = -1 };

struct CompressConfig {
size_t inputSize; // length of data to compress
size_t engineNum; // how many decompress engines
size_t maxRatio; // how much size of a basic compression block, only 64 supported now (8x: 64 4x: 32)
size_t channel; // channels of L2 or DDR. For load balance
size_t fractalSize; // size of compressing block
bool isTight; // whether compose compressed data tightly
size_t init_offset;
};

CmpStatus CompressWeights(char* input, const CompressConfig& compressConfig, char* indexs, char* output,
size_t& compressedLength);

#endif // COMPRESS_H

+ 33
- 0
inc/common/util/compress/compress_weight.h View File

@@ -0,0 +1,33 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef COMPRESS_WEIGHT_H
#define COMPRESS_WEIGHT_H

#include "compress.h"

const int SHAPE_SIZE_WEIGHT = 4;

struct CompressOpConfig {
int64_t wShape[SHAPE_SIZE_WEIGHT];
size_t compressTilingK;
size_t compressTilingN;
struct CompressConfig compressConfig;
};

extern "C" CmpStatus CompressWeightsConv2D(const char *const input, char *const zipBuffer, char *const infoBuffer,
CompressOpConfig *const param);
#endif // COMPRESS_WEIGHT_H

+ 94
- 0
inc/common/util/error_manager/error_manager.h View File

@@ -0,0 +1,94 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef ERROR_MANAGER_H_
#define ERROR_MANAGER_H_

#include <map>
#include <string>
#include <vector>

class ErrorManager {
public:
///
/// @brief Obtain ErrorManager instance
/// @return ErrorManager instance
///
static ErrorManager &GetInstance();

///
/// @brief init
/// @param [in] path: current so path
/// @return int 0(success) -1(fail)
///
int Init(std::string path);

///
/// @brief Report error message
/// @param [in] error_code: error code
/// @param [in] args_map: parameter map
/// @return int 0(success) -1(fail)
///
int ReportErrMessage(std::string error_code, const std::map<std::string, std::string> &args_map);

///
/// @brief output error message
/// @param [in] handle: print handle
/// @return int 0(success) -1(fail)
///
int OutputErrMessage(int handle);

///
/// @brief output message
/// @param [in] handle: print handle
/// @return int 0(success) -1(fail)
///
int OutputMessage(int handle);

///
/// @brief Report error message
/// @param [in] key: vector parameter key
/// @param [in] value: vector parameter value
///
void ATCReportErrMessage(std::string error_code, const std::vector<std::string> &key = {},
const std::vector<std::string> &value = {});

private:
struct ErrorInfo {
std::string error_id;
std::string error_message;
std::vector<std::string> arg_list;
};

ErrorManager() {}
~ErrorManager() {}

ErrorManager(const ErrorManager &) = delete;
ErrorManager(ErrorManager &&) = delete;
ErrorManager &operator=(const ErrorManager &) = delete;
ErrorManager &operator=(ErrorManager &&) = delete;

int ParseJsonFile(std::string path);

int ReadJsonFile(const std::string &file_path, void *handle);

bool is_init_ = false;
std::map<std::string, ErrorInfo> error_map_;
std::vector<std::string> error_messages_;
std::vector<std::string> warning_messages_;
};

#endif // ERROR_MANAGER_H_

+ 101
- 0
inc/common/util/platform_info.h View File

@@ -0,0 +1,101 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PLATFORM_INFO_H
#define PLATFORM_INFO_H

#include <map>
#include <string>
#include <vector>
#include "platform_info_def.h"

using std::map;
using std::string;
using std::vector;

namespace fe {
class PlatformInfoManager {
public:
PlatformInfoManager(const PlatformInfoManager &) = delete;
PlatformInfoManager &operator=(const PlatformInfoManager &) = delete;

static PlatformInfoManager &Instance();
uint32_t InitializePlatformInfo();
uint32_t Finalize();

uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo);

uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platformInfo, OptionalInfo &optiCompilationInfo);

void SetOptionalCompilationInfo(OptionalInfo &optiCompilationInfo);

private:
PlatformInfoManager();
~PlatformInfoManager();

uint32_t LoadIniFile(string iniFileRealPath);

void Trim(string &str);

uint32_t LoadConfigFile(string realPath);

string RealPath(const std::string &path);

string GetSoFilePath();

void ParseVersion(map<string, string> &versionMap, string &socVersion, PlatformInfo &platformInfoTemp);

void ParseSocInfo(map<string, string> &socInfoMap, PlatformInfo &platformInfoTemp);

void ParseCubeOfAICoreSpec(map<string, string> &aiCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseBufferOfAICoreSpec(map<string, string> &aiCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseUBOfAICoreSpec(map<string, string> &aiCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseUnzipOfAICoreSpec(map<string, string> &aiCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseAICoreSpec(map<string, string> &aiCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseBufferOfAICoreMemoryRates(map<string, string> &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp);

void ParseAICoreMemoryRates(map<string, string> &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp);

void ParseUBOfAICoreMemoryRates(map<string, string> &aiCoreMemoryRatesMap, PlatformInfo &platformInfoTemp);

void ParseAICoreintrinsicDtypeMap(map<string, string> &aiCoreintrinsicDtypeMap, PlatformInfo &platformInfoTemp);

void ParseVectorCoreSpec(map<string, string> &vectorCoreSpecMap, PlatformInfo &platformInfoTemp);

void ParseVectorCoreMemoryRates(map<string, string> &vectorCoreMemoryRatesMap, PlatformInfo &platformInfoTemp);

void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platformInfoTemp);

void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vectorCoreintrinsicDtypeMap,
PlatformInfo &platformInfoTemp);

uint32_t ParsePlatformInfoFromStrToStruct(map<string, map<string, string>> &contentInfoMap, string &socVersion,
PlatformInfo &platformInfoTemp);

uint32_t AssemblePlatformInfoVector(map<string, map<string, string>> &contentInfoMap);

private:
bool initFlag_;
map<string, PlatformInfo> platformInfoMap_;
OptionalInfo optiCompilationInfo_;
};
} // namespace fe
#endif

+ 140
- 0
inc/common/util/platform_info_def.h View File

@@ -0,0 +1,140 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef PLATFORM_INFO_DEF_H
#define PLATFORM_INFO_DEF_H

#include <map>
#include <string>
#include <vector>

using std::map;
using std::string;
using std::vector;

namespace fe {
enum MemoryType { DDR = 0, HBM };

enum L2Type { Cache = 0, Buff };

typedef struct tagStrInfo {
string aicVersion;
string ccecAICVersion;
string ccecAIVVersion;
string isSupportAIcpuCompiler;
} StrInfo;

typedef struct tagSoCInfo {
uint32_t aiCoreCnt;
uint32_t vectorCoreCnt;
uint32_t aiCpuCnt;
MemoryType memoryType;
uint64_t memorySize;
L2Type l2Type;
uint64_t l2Size;
uint32_t l2PageNum;
} SoCInfo;

typedef struct tagAiCoreSpec {
double cubeFreq;
uint64_t cubeMSize;
uint64_t cubeNSize;
uint64_t cubeKSize;
uint64_t vecCalcSize;
uint64_t l0ASize;
uint64_t l0BSize;
uint64_t l0CSize;
uint64_t l1Size;
uint64_t smaskBuffer;
uint64_t ubSize;
uint64_t ubblockSize;
uint64_t ubbankSize;
uint64_t ubbankNum;
uint64_t ubburstInOneBlock;
uint64_t ubbankGroupNum;
uint32_t unzipEngines;
uint32_t unzipMaxRatios;
uint32_t unzipChannels;
uint8_t unzipIsTight;
} AiCoreSpec;

typedef struct tagAiCoreMemoryRates {
double ddrRate;
double ddrReadRate;
double ddrWriteRate;
double l2Rate;
double l2ReadRate;
double l2WriteRate;
double l1ToL0ARate;
double l1ToL0BRate;
double l1ToUBRate;
double l0CToUBRate;
double ubToL2Rate;
double ubToDdrRate;
double ubToL1Rate;
} AiCoreMemoryRates;

typedef struct tagVectorCoreSpec {
double vecFreq;
uint64_t vecCalcSize;
uint64_t smaskBuffer;
uint64_t ubSize;
uint64_t ubblockSize;
uint64_t ubbankSize;
uint64_t ubbankNum;
uint64_t ubburstInOneBlock;
uint64_t ubbankGroupNum;
uint64_t vectorRegSize;
uint64_t predicateRegSize;
uint64_t addressRegSize;
} VectorCoreSpec;

typedef struct tagVectorCoreMemoryRates {
double ddrRate;
double ddrReadRate;
double ddrWriteRate;
double l2Rate;
double l2ReadRate;
double l2WriteRate;
double ubToL2Rate;
double ubToDdrRate;
} VectorCoreMemoryRates;

typedef struct tagCPUCache {
uint32_t AICPUSyncBySW;
uint32_t TSCPUSyncBySW;
} CPUCache;

typedef struct tagPlatformInfo {
StrInfo strInfo;
SoCInfo socInfo;
AiCoreSpec aiCoreSpec;
AiCoreMemoryRates aiCoreMemoryRates;
map<string, vector<string>> aiCoreIntrinsicDtypeMap;
VectorCoreSpec vectorCoreSpec;
VectorCoreMemoryRates vectorCoreMemoryRates;
CPUCache cpucache;
map<string, vector<string>> vectorCoreIntrinsicDtypeMap;
} PlatformInfo;

typedef struct tagOptionalInfo {
string socVersion;
string coreType;
uint32_t aiCoreNum;
string l1FusionFlag;
} OptionalInfo;
} // namespace fe
#endif

+ 75
- 0
inc/external/graph/attr_value.h View File

@@ -0,0 +1,75 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_GRAPH_ATTR_VALUE_H_
#define INC_EXTERNAL_GRAPH_ATTR_VALUE_H_

#include <map>
#include <memory>
#include <string>
#include <vector>

#include "./ge_error_codes.h"

using std::make_shared;
using std::map;
using std::pair;
using std::string;
using std::to_string;
using std::unique_ptr;
using std::vector;

namespace ge {
class AttrValueImpl;
/*lint -e148*/
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY AttrValue {
public:
using INT = int64_t;
using FLOAT = float;
using STR = std::string;

AttrValue();
~AttrValue() = default;

// GetValue, not list type
template <typename T, typename DT>
graphStatus GetValue(DT &val) const {
T valGet;
auto status = GetValue(valGet);
if (status != GRAPH_SUCCESS) {
return status;
}
val = DT(valGet);
return GRAPH_SUCCESS;
}

template <typename T, typename DT>
static T CreateFrom(DT &&val) {
return val;
}

std::shared_ptr<AttrValueImpl> impl;

private:
#define VALUE_SET_GET_DEC(DT) graphStatus GetValue(DT &val) const;
VALUE_SET_GET_DEC(AttrValue::STR)
VALUE_SET_GET_DEC(AttrValue::INT)
VALUE_SET_GET_DEC(AttrValue::FLOAT)
#undef VALUE_SET_GET_DEC
};
/*lint +e148*/
} // namespace ge
#endif // INC_EXTERNAL_GRAPH_ATTR_VALUE_H_

+ 38
- 0
inc/external/graph/ge_error_codes.h View File

@@ -0,0 +1,38 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_
#define INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_

namespace ge {
#ifdef HOST_VISIBILITY
#define GE_FUNC_HOST_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_HOST_VISIBILITY
#endif
#ifdef DEV_VISIBILITY
#define GE_FUNC_DEV_VISIBILITY __attribute__((visibility("default")))
#else
#define GE_FUNC_DEV_VISIBILITY
#endif

using graphStatus = uint32_t;
const graphStatus GRAPH_FAILED = 0xFFFFFFFF;
const graphStatus GRAPH_SUCCESS = 0;
const graphStatus GRAPH_PARAM_INVALID = 50331649;
} // namespace ge

#endif // INC_EXTERNAL_GRAPH_GE_ERROR_CODES_H_

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save