Compare commits

...

108 Commits

Author SHA1 Message Date
  i-robot d1e3232154 !2038 bugfix for oms taskdef random change 3 years ago
  gengchao4@huawei.com 9d94a5fef5 bugfix for om's taskdef random change 3 years ago
  mindspore-ci-bot 59361d83a9 !1470 LinkToPotentialPrecedenceNode c76 4 years ago
  lianghao 1993675125 LinkToPotentialPrecedenceNode 4 years ago
  mindspore-ci-bot a205af43fc !1324 Fix bug of single_op ageing. 4 years ago
  unknown 63e8d1e291 Fix bug of single_op ageing. 4 years ago
  mindspore-ci-bot 5d094c7f72 !1289 modify static depends 4 years ago
  wxl 11b77d9daf modify static depends 4 years ago
  mindspore-ci-bot a8c137e5eb !1058 kTaskNumPerHcclNode 4 years ago
  lianghao 3dd23c428e kTaskNumPerHcclNode 4 years ago
  mindspore-ci-bot d98d77371e !1045 move end_task after assignadd on iterator_loop case 4 years ago
  wangxiaotian22 2b0464ad81 move end_task before active stream when bpfp set by env 4 years ago
  mindspore-ci-bot 145d6a97d6 !1011 move end_task after assignadd on iterator_loop case 4 years ago
  wangxiaotian22 c72ba8ad66 parser update 4 years ago
  wangxiaotian22 d8db67d06f add metadef update 4 years ago
  wangxiaotian22 9712b297b7 move end_task after assignadd on iterator_loop case 4 years ago
  mindspore-ci-bot 885b3a6b7f !951 gensessionid add pid prefix 4 years ago
  wangxiaotian22 ee5c71962d gensessionid add pid prefix 4 years ago
  mindspore-ci-bot c14c6a87cb !947 change mult batch to switchn 4 years ago
  wjm 81cd9527aa fix error 4 years ago
  wjm d3515b1624 change mult batch to switchn 4 years ago
  mindspore-ci-bot cb019693b1 !945 Add cc_task task_info log. 4 years ago
  mindspore-ci-bot 6529491d90 !943 Add keep_dtype attribute 4 years ago
  unknown 4318596b9a Add cc_task task_info log. 4 years ago
  lwx897429 970745eedf Add keep_dtype attribute 4 years ago
  mindspore-ci-bot 89cccaeb3c !907 modify dump task proto in c76 4 years ago
  zhou_chao1993 7f341ab53a modify dump_task proto in c76 4 years ago
  mindspore-ci-bot ae91b07e6a !917 Migration subgraph Const Node 4 years ago
  mindspore-ci-bot 267ddd9801 !916 Bugfix: check pricision loss when cast from int64 to bool 4 years ago
  wjm 5bc603e52d fix error 4 years ago
  zhaoxinxin a608eee4e2 modified: ge/graph/common/transop_util.cc 4 years ago
  mindspore-ci-bot 1218a757c5 !898 Add submodelId in dynamic shape 4 years ago
  taoxiangdong fadd5d1874 Add submodelId in dyanamic shape 4 years ago
  mindspore-ci-bot 1cd83211d6 !891 fix l2 buffer error 4 years ago
  mindspore-ci-bot 3eeb8a9c97 !893 remove interface aclgrphInfershapeAndType 4 years ago
  mindspore-ci-bot 0126007d89 !888 add SwitchDeadBranchElimination & MergePass in graph prepare 4 years ago
  wxl a69806eee1 remove interface aclgrphInfershapeAndType 4 years ago
  wjm aac7897a44 fix l2 buffer error 4 years ago
  chenyemeng 2b90729519 add SwitchDeadBranchElimination & MergePass in graph prepare 4 years ago
  mindspore-ci-bot d5c6e8146b !872 Log macro error in windows 4 years ago
  taoxiangdong 850f6efb29 Log print macro error 4 years ago
  mindspore-ci-bot 65509ee0f0 !853 add whole graph optimize 4 years ago
  gengchao4@huawei.com 8ad6d4b463 add whole graph optimize 4 years ago
  mindspore-ci-bot 103aa22616 !839 fixed issue of repeated profile subscription 4 years ago
  mindspore-ci-bot 11b6f47be6 !811 modify p2p addr assigner bug in c76 4 years ago
  mindspore-ci-bot 501e184095 !828 Free mem before return 4 years ago
  taoxiangdong c8cc205f33 Free memory before return 4 years ago
  lwx897429 d30dd18e09 fixed issue of repeated profile subscription 4 years ago
  mindspore-ci-bot 63cc95c5e5 !837 Feature: delete compress_weight_conf para of aclgrphParse interface 4 years ago
  l00444296 b6aa9c0e4d Feature: delete compress_weight_conf para of aclgrphParse interface 4 years ago
  mindspore-ci-bot 032f9d1f07 !795 Parse traing trace switch in profstart func 4 years ago
  mindspore-ci-bot dfd2314793 !782 Feature: delete compress_weight_conf para of aclgrphParse interface 4 years ago
  mindspore-ci-bot 69e7d5bf64 !789 fix question that release all loaded model memory when memory is not enough 4 years ago
  mindspore-ci-bot c451c30026 !797 fix dynamic aipp error 4 years ago
  mindspore-ci-bot 61b2de9c38 !786 dump 4 years ago
  mindspore-ci-bot a552edfd11 !779 Feature: delete is_load_profiling_ reset to false 4 years ago
  wjm ba745a12d3 fix 4 years ago
  zhou_chao1993 0554dd5942 modify p2p addr assigner bug 4 years ago
  weiyang 628162c7b0 dump 4 years ago
  wjm a9b4cf400a fix dynamic aipp error 4 years ago
  taoxiangdong 5e85506711 Parse training trace on profstart 4 years ago
  mindspore-ci-bot 5a8206cf7d !793 license update, mentioning usage of tensorflow and caffe code 4 years ago
  yanghaoran fba4643a47 license update, mentioning usage of tensorflow and caffe code 4 years ago
  wxl 2504b6b7b9 bugfix 4 years ago
  l00444296 50fdb59274 Feature: delete compress_weight_conf para of aclgrphParse interface 4 years ago
  l00444296 d1eb560616 Feature: delete is_load_profiling_ reset to false 4 years ago
  mindspore-ci-bot 72eef81746 !767 for perfmance 4 years ago
  mindspore-ci-bot 856fb4419a !724 Feature: repair dynamic_stitch_kernel folding bug 4 years ago
  mindspore-ci-bot b3cfade65e !771 op_compiler_cache_dir 4 years ago
  mindspore-ci-bot 35983c7c38 !761 Check aicpu op type 4 years ago
  mindspore-ci-bot 4fe214984d !755 errorcode 4 years ago
  lianghao bb6de73c97 op_compiler_cache_dir 4 years ago
  weiyang 43c1e02265 perf 4 years ago
  taoxiangdong 546e9f7cf9 Check aicpu op type 4 years ago
  mindspore-ci-bot c2fb4adbce !760 device os log missing 4 years ago
  taoxiangdong c485a99932 device os log missing 4 years ago
  mindspore-ci-bot 5de4cd5479 !733 decrease om size 4 years ago
  mindspore-ci-bot 30000dd4e7 !747 fix case plugin error 4 years ago
  mindspore-ci-bot 60ceda422f !752 Fix storage bug. 4 years ago
  weiyang 48585c78f0 errorcode 4 years ago
  mindspore-ci-bot 51314c970b !751 Fix bug of modify output shape to -2. 4 years ago
  unknown 17428ef7a8 Fix storage bug. 4 years ago
  unknown 4a315e1d4f Fix bug of modify output shape to -2. 4 years ago
  wjm c694a907e2 fix case flugin 4 years ago
  mindspore-ci-bot 8bb847b429 !746 delete invalid comment 4 years ago
  wqtshg acae7cfaea delete invalid comment 4 years ago
  mindspore-ci-bot 3efd05e1c4 !744 update c76 code 4 years ago
  wqtshg 7f542c2b68 update c76 code ut 4 years ago
  wqtshg 0b6354215a update c76 ut 4 years ago
  wqtshg 8a8a42cf03 update c76 code ut 4 years ago
  wqtshg 8e87e4b7a5 update ge ut 4 years ago
  wqtshg ea477de6eb update test 4 years ago
  wqtshg 0f36063e8c update ut 4 years ago
  wqtshg 1ac3bff4af update c76 submodule 4 years ago
  lianghao 2898b2d83c decrease om size 4 years ago
  wqtshg 455f21252f update c76 log_cpp 4 years ago
  l00444296 48973d4ea1 Feature: repair dynamic_stitch_kernel folding bug 4 years ago
  wqtshg 4ac0f69204 add c76 LOG_CPP 4 years ago
  wqtshg d662b5e84e update c76 submodule 4 years ago
  wqtshg e38e5e06a2 update c76 cmake 4 years ago
  wqtshg eea696c45b update c76 code 4 years ago
  wqtshg eaeaec68ff update slog to alog 4 years ago
  wqtshg 86bb779cee update c76 submodule 4 years ago
  计晨 fe5db33358 !712 update c76 code 4 years ago
  wqtshg 5cc51efc74 update c76 code and submodule 4 years ago
  计晨 ca855a5bf7 !707 update c76 code 4 years ago
  wqtshg 16758ee2b1 update c76 code 4 years ago
  wqtshg 5d043adbca update c76 code 4 years ago
100 changed files with 1944 additions and 1291 deletions
Split View
  1. +2
    -2
      .gitmodules
  2. +6
    -6
      CMakeLists.txt
  3. +73
    -0
      Third_Party_Open_Source_Software_Notice
  4. +6
    -8
      build.sh
  5. +1
    -1
      cmake/FindModule.cmake
  6. +0
    -1
      cmake/external_libs/gflags.cmake
  7. +2
    -6
      cmake/external_libs/gtest.cmake
  8. +7
    -12
      cmake/external_libs/json.cmake
  9. +1
    -5
      cmake/external_libs/onnx.cmake
  10. +0
    -1
      cmake/external_libs/protobuf_shared.cmake
  11. +6
    -1
      cmake/external_libs/protobuf_static.cmake
  12. +0
    -1
      cmake/external_libs/protoc.cmake
  13. +2
    -11
      cmake/external_libs/securec.cmake
  14. +1
    -0
      cmake/intf_pub_linux.cmake
  15. +2
    -0
      ge/CMakeLists.txt
  16. +19
    -0
      ge/client/ge_api.cc
  17. +2
    -2
      ge/common/CMakeLists.txt
  18. +1
    -14
      ge/common/dump/dump_op.cc
  19. +2
    -2
      ge/common/ge/tbe_plugin_manager.cc
  20. +1
    -1
      ge/common/profiling/ge_profiling.cc
  21. +12
    -12
      ge/common/profiling/profiling_manager.cc
  22. +0
    -2
      ge/common/proto/op_mapping_info.proto
  23. +8
    -0
      ge/common/proto/tensorflow/attr_value.proto
  24. +8
    -0
      ge/common/proto/tensorflow/function.proto
  25. +8
    -0
      ge/common/proto/tensorflow/graph.proto
  26. +8
    -0
      ge/common/proto/tensorflow/graph_library.proto
  27. +8
    -0
      ge/common/proto/tensorflow/node_def.proto
  28. +8
    -0
      ge/common/proto/tensorflow/op_def.proto
  29. +8
    -0
      ge/common/proto/tensorflow/resource_handle.proto
  30. +8
    -0
      ge/common/proto/tensorflow/tensor.proto
  31. +8
    -0
      ge/common/proto/tensorflow/tensor_shape.proto
  32. +8
    -0
      ge/common/proto/tensorflow/types.proto
  33. +8
    -0
      ge/common/proto/tensorflow/versions.proto
  34. +1
    -1
      ge/executor/CMakeLists.txt
  35. +95
    -40
      ge/executor/ge_executor.cc
  36. +1
    -0
      ge/executor/proto/dump_task.proto
  37. +0
    -2
      ge/executor/proto/op_mapping_info.proto
  38. +3
    -2
      ge/ge_local_engine/engine/host_cpu_engine.cc
  39. +40
    -20
      ge/generator/ge_generator.cc
  40. +2
    -1
      ge/graph/build/memory/graph_mem_assigner.cc
  41. +53
    -0
      ge/graph/build/model_builder.cc
  42. +6
    -0
      ge/graph/build/model_builder.h
  43. +1
    -1
      ge/graph/build/stream_allocator.cc
  44. +8
    -6
      ge/graph/build/stream_graph_optimizer.cc
  45. +9
    -7
      ge/graph/build/task_generator.cc
  46. +4
    -1
      ge/graph/common/transop_util.cc
  47. +69
    -12
      ge/graph/load/graph_loader.cc
  48. +7
    -1
      ge/graph/load/graph_loader.h
  49. +1
    -6
      ge/graph/load/new_model_manager/data_dumper.cc
  50. +151
    -144
      ge/graph/load/new_model_manager/davinci_model.cc
  51. +22
    -33
      ge/graph/load/new_model_manager/davinci_model.h
  52. +267
    -54
      ge/graph/load/new_model_manager/model_manager.cc
  53. +11
    -4
      ge/graph/load/new_model_manager/model_manager.h
  54. +16
    -9
      ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  55. +2
    -0
      ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h
  56. +90
    -77
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  57. +3
    -3
      ge/graph/load/new_model_manager/task_info/kernel_task_info.h
  58. +30
    -2
      ge/graph/manager/graph_manager.cc
  59. +3
    -0
      ge/graph/manager/graph_mem_allocator.cc
  60. +33
    -0
      ge/graph/optimize/graph_optimize.cc
  61. +3
    -0
      ge/graph/optimize/graph_optimize.h
  62. +33
    -0
      ge/graph/passes/atomic_addr_clean_pass.cc
  63. +8
    -0
      ge/graph/passes/atomic_addr_clean_pass.h
  64. +4
    -8
      ge/graph/passes/dynamic_single_op_reset_shape_pass.cc
  65. +1
    -1
      ge/graph/passes/dynamic_single_op_reset_shape_pass.h
  66. +53
    -45
      ge/graph/passes/multi_batch_clone_pass.cc
  67. +11
    -11
      ge/graph/passes/multi_batch_clone_pass.h
  68. +231
    -315
      ge/graph/passes/subgraph_const_migration_pass.cc
  69. +42
    -42
      ge/graph/passes/subgraph_const_migration_pass.h
  70. +8
    -4
      ge/graph/passes/switch_to_stream_switch_pass.cc
  71. +44
    -0
      ge/graph/preprocess/graph_preprocess.cc
  72. +6
    -7
      ge/graph/preprocess/multi_batch_copy_graph.cc
  73. +10
    -1
      ge/host_kernels/dynamic_stitch_kernel.cc
  74. +22
    -34
      ge/hybrid/executor/node_state.cc
  75. +1
    -2
      ge/hybrid/executor/node_state.h
  76. +8
    -1
      ge/hybrid/executor/subgraph_executor.cc
  77. +12
    -16
      ge/hybrid/executor/worker/execution_engine.cc
  78. +18
    -103
      ge/hybrid/executor/worker/shape_inference_engine.cc
  79. +0
    -4
      ge/hybrid/executor/worker/shape_inference_engine.h
  80. +27
    -4
      ge/hybrid/model/hybrid_model_builder.cc
  81. +1
    -0
      ge/hybrid/model/hybrid_model_builder.h
  82. +34
    -57
      ge/hybrid/model/node_item.cc
  83. +0
    -6
      ge/hybrid/model/node_item.h
  84. +0
    -10
      ge/hybrid/node_executor/aicore/aicore_node_executor.cc
  85. +0
    -11
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  86. +14
    -2
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
  87. +0
    -38
      ge/hybrid/node_executor/task_context.cc
  88. +0
    -10
      ge/hybrid/node_executor/task_context.h
  89. +15
    -4
      ge/ir_build/atc_ir_common.cc
  90. +2
    -1
      ge/ir_build/atc_ir_common.h
  91. +0
    -36
      ge/ir_build/ge_ir_build.cc
  92. +1
    -0
      ge/offline/CMakeLists.txt
  93. +116
    -0
      ge/offline/keep_dtype_option.cc
  94. +26
    -0
      ge/offline/keep_dtype_option.h
  95. +16
    -0
      ge/offline/main.cc
  96. +3
    -0
      ge/offline/module.mk
  97. +2
    -2
      ge/omm/csa_interact.cc
  98. +8
    -0
      ge/proto/caffe/caffe.proto
  99. +2
    -0
      ge/proto/dump_task.proto
  100. +0
    -2
      ge/proto/op_mapping_info.proto

+ 2
- 2
.gitmodules View File

@@ -1,8 +1,8 @@
[submodule "parser"]
path = parser
url = https://gitee.com/ascend/parser.git
branch = development
branch = r1.2.0
[submodule "metadef"]
path = metadef
url = https://gitee.com/ascend/metadef.git
branch = development
branch = r1.2.0

+ 6
- 6
CMakeLists.txt View File

@@ -52,10 +52,10 @@ if (ENABLE_OPEN_SRC)
include(cmake/FindModule.cmake)
include(cmake/intf_pub_linux.cmake)

# for CPU/GPU mode, find c_sec and slog from local prebuild
# for CPU/GPU mode, find c_sec and alog from local prebuild
#if(NOT ENABLE_D AND NOT GE_ONLY)
# set(GE_PREBUILD_PATH ${GE_CODE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
# find_module(slog libslog.so ${GE_PREBUILD_PATH})
# find_module(slog libalog.so ${GE_PREBUILD_PATH})
# if D_LINK_PATH is set in environment variables, search libraries in given path
if(DEFINED ENV{D_LINK_PATH})
# D_LINK_PATH is set
@@ -72,7 +72,7 @@ if (ENABLE_OPEN_SRC)
endif()
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
set(STATIC_ACL_LIB ${GE_LIB_PATH})
find_module(slog libslog.so ${GE_LIB_PATH})
find_module(slog libalog.so ${GE_LIB_PATH})
find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
find_module(hccl libhccl.so ${GE_LIB_PATH})
@@ -88,7 +88,7 @@ if (ENABLE_OPEN_SRC)
elseif(ENABLE_GE_COV OR ENABLE_GE_UT)
add_subdirectory(tests)
else()
find_module(slog libslog.so ${ASCEND_ATC_DIR})
find_module(slog libalog.so ${ASCEND_ATC_DIR})
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
@@ -154,7 +154,7 @@ elseif (ENABLE_D OR ENABLE_ACL)
include(cmake/intf_pub_linux.cmake)

# common libraries
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

@@ -174,7 +174,7 @@ elseif(ENABLE_MS_TESTCASES)
include(cmake/intf_pub_linux.cmake)

# common libraries
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
find_module(slog libalog.so ${ASCEND_MS_DRIVER_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})



+ 73
- 0
Third_Party_Open_Source_Software_Notice View File

@@ -458,3 +458,76 @@ Copyright (c) Facebook Inc. and Microsoft Corporation.

License: MIT License
Please see above.



Software: caffe 1.0

License: BSD 2-Clause License

Open Source Software Licensed Under the BSD 2-Clause License

GraphEngine uses source code files from caffe so as to support model format conversion from caffe model to GraphEngine model.
Please see below for the full list of source code files from caffe that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. caffe.proto master
All contributions by the University of California:
Copyright (c) 2014-2017 The Regents of the University of California (Regents)
All rights reserved.


Terms of the BSD 2-Clause License:
--------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



Software: tensorflow 1.15.0

License: Apache-2.0 License

Open Source Software Licensed Under the Apache-2.0 License


GraphEngine uses source code files from tensorflow so as to support model format conversion from tensorflow model to GraphEngine model.
Please see below for the full list of source code files from tensorflow that are used by GraphEngine.
The below software in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications"). All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
----------------------------------------------------------------------------------------
1. attr_value.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

2. function.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

3. graph.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

4. node_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

5. op_def.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

6. resource_handle.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

7. tensor.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

8. tensor_shape.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

9. types.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

10. versions.proto master
Copyright 2015 The TensorFlow Authors. All Rights Reserved.

Terms of the Apache-2.0 License:
Please see above.

+ 6
- 8
build.sh View File

@@ -224,14 +224,12 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
# fi

# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then
echo "Generating coverage statistics, please wait..."
cd ${BASEPATH}
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
# echo "Generating coverage statistics, please wait..."
# cd ${BASEPATH}
# rm -rf ${BASEPATH}/cov
# mkdir ${BASEPATH}/cov
# gcovr -r ./ --exclude 'third_party' --exclude 'build' --exclude 'tests' --exclude 'prebuild' --exclude 'inc' --print-summary --html --html-details -d -o cov/index.html
# fi
fi

# generate output package in tar form, including ut/st libraries/executables


+ 1
- 1
cmake/FindModule.cmake View File

@@ -21,7 +21,7 @@ function(find_module module name)
if ("${${module}_LIBRARY_DIR}" STREQUAL "${module}_LIBRARY_DIR-NOTFOUND")
message(FATAL_ERROR "${name} not found in ${path}")
endif()
add_library(${module} SHARED IMPORTED)
set_target_properties(${module} PROPERTIES
IMPORTED_LOCATION ${${module}_LIBRARY_DIR}


+ 0
- 1
cmake/external_libs/gflags.cmake View File

@@ -23,7 +23,6 @@ ExternalProject_Add(gflags_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../../third_party/gflags/src/gflags-2.2.2
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gflags_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gflags <SOURCE_DIR>
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install


+ 2
- 6
cmake/external_libs/gtest.cmake View File

@@ -10,10 +10,7 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/ge_gtest/release-1.8.0.tar.gz")
set(MD5 "")
elseif (ENABLE_GITEE)
if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/googletest/repository/archive/release-1.8.0.tar.gz")
set(MD5 "")
else()
@@ -25,9 +22,8 @@ set (gtest_CXXFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-
set (gtest_CFLAGS "-D_GLIBCXX_USE_CXX11_ABI=0 -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-all -Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(gtest_build
URL ${REQ_URL}
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -DCMAKE_CXX_FLAGS=${gtest_CXXFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/gtest <SOURCE_DIR>
-DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
-DBUILD_TESTING=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_MACOSX_RPATH=TRUE -Dgtest_disable_pthreads=ON
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install
EXCLUDE_FROM_ALL TRUE


+ 7
- 12
cmake/external_libs/json.cmake View File

@@ -5,24 +5,19 @@ endif()
include(ExternalProject)

set(JSON_SRC_DIR ${CMAKE_BINARY_DIR}/opensrc/json/include)
if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/ge_nlohmann_json/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
#elseif (ENABLE_GITEE)
#if (ENABLE_GITEE)
# set(REQ_URL "https://gitee.com/mirrors/JSON-for-Modern-CPP/repository/archive/v3.6.1.zip")
# set(MD5 "5bda78ce308e6cfcf614dcf1d5ff27a7")
#set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
else()
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
endif ()
# set(JSON_INCLUDE_DIR "${JSON_SRC_DIR}/include")
#else()
set(REQ_URL "https://github.com/nlohmann/json/releases/download/v3.6.1/include.zip")
set(MD5 "0dc903888211db3a0f170304cd9f3a89")
set(JSON_INCLUDE_DIR ${JSON_SRC_DIR})
#endif ()
ExternalProject_Add(json_build
URL ${REQ_URL}
#URL /home/txd/workspace/cloud_code/pkg/include.zip
SOURCE_DIR ${JSON_SRC_DIR}
TLS_VERIFY OFF
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""


+ 1
- 5
cmake/external_libs/onnx.cmake View File

@@ -6,10 +6,7 @@ set(ONNX_PROTO_DIR ${CMAKE_BINARY_DIR}/onnx)
set(ONNX_PROTO_FILE ${ONNX_PROTO_DIR}/onnx.proto)
file(MAKE_DIRECTORY ${ONNX_PROTO_DIR})

if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/onnx/onnx-1.6.0.tar.gz")
set(MD5 "512f2779d6215d4a36f366b6b9acdf1e")
elseif (ENABLE_GITEE)
if (ENABLE_GITEE)
set(REQ_URL "https://gitee.com/mirrors/ONNX/repository/archive/v1.6.0.tar.gz")
set(MD5 "1bdbcecdd68ea8392630467646776e02")
else()
@@ -22,7 +19,6 @@ ExternalProject_Add(onnx
#URL /home/txd/workspace/cloud_code/pkg/onnx-1.6.0.tar.gz
#URL_HASH SHA256=3b88c3fe521151651a0403c4d131cb2e0311bd28b753ef692020a432a81ce345
#SOURCE_DIR ${ONNX_SRC_DIR}
TLS_VERIFY OFF
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
#INSTALL_COMMAND ""


+ 0
- 1
cmake/external_libs/protobuf_shared.cmake View File

@@ -26,7 +26,6 @@ set(protobuf_CXXFLAGS "-Wno-maybe-uninitialized -Wno-unused-parameter -fPIC -fst
set(protobuf_LDFLAGS "-Wl,-z,relro,-z,now,-z,noexecstack")
ExternalProject_Add(protobuf_build
URL ${REQ_URL}
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-Dprotobuf_WITH_ZLIB=OFF
-DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}


+ 6
- 1
cmake/external_libs/protobuf_static.cmake View File

@@ -1,3 +1,7 @@
if (HAVE_PROTOBUF_STATIC)
return()
endif()

include(ExternalProject)
include(GNUInstallDirs)
#set(CMAKE_INSTALL_PREFIX ${GE_CODE_DIR}/output)
@@ -27,7 +31,6 @@ ExternalProject_Add(protobuf_static_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${METADEF_DIR}/../../third_party/protobuf/src/protobuf-3.8.0
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@@ -58,3 +61,5 @@ include_directories(${PROTOBUF_STATIC_PKG_DIR}/include)
endif ()

add_dependencies(ascend_protobuf_static protobuf_static_build)

set(HAVE_PROTOBUF_STATIC TRUE)

+ 0
- 1
cmake/external_libs/protoc.cmake View File

@@ -30,7 +30,6 @@ ExternalProject_Add(protoc_build
URL ${REQ_URL}
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../third_party/protobuf/src/protobuf-3.8.0
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND} -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_CXX_FLAGS=${protobuf_CXXFLAGS} -DCMAKE_CXX_LDFLAGS=${protobuf_LDFLAGS} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/protoc <SOURCE_DIR>/cmake
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) install


+ 2
- 11
cmake/external_libs/securec.cmake View File

@@ -10,20 +10,11 @@ if ((${CMAKE_INSTALL_PREFIX} STREQUAL /usr/local) OR
message(STATUS "No install prefix selected, default to ${CMAKE_INSTALL_PREFIX}.")
endif()

if (GE_PB_PKG)
set(REQ_URL "${GE_PB_PKG}/libs/securec/v1.1.10.tar.gz")
set(MD5 "")
else()
set(REQ_URL "https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz")
set(MD5 "")
endif ()

ExternalProject_Add(c_sec_build
URL ${REQ_URL}
#URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
URL https://gitee.com/openeuler/libboundscheck/repository/archive/v1.1.10.tar.gz
#URL /home/txd/workspace/linux_cmake/pkg/protobuf-3.8.0.tar.gz
#SOURCE_DIR ${GE_CODE_DIR}/../libc_sec
PATCH_COMMAND patch -p1 < ${GE_CODE_DIR}/metadef/third_party/patch/securec/0001-add-securec-cmake-script.patch
TLS_VERIFY OFF
CONFIGURE_COMMAND ${CMAKE_COMMAND}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}


+ 1
- 0
cmake/intf_pub_linux.cmake View File

@@ -16,6 +16,7 @@ target_compile_definitions(intf_pub INTERFACE
$<$<CONFIG:Debug>:CFG_BUILD_DEBUG>
WIN64=1
LINUX=0
LOG_CPP
)
target_link_options(intf_pub INTERFACE
-Wl,-z,relro


+ 2
- 0
ge/CMakeLists.txt View File

@@ -620,6 +620,7 @@ target_compile_definitions(ge_runner PRIVATE
FMK_SUPPORT_DUMP
DAVINCI_CLOUD
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_runner PRIVATE
@@ -687,6 +688,7 @@ target_compile_definitions(ge_compiler PRIVATE
FMK_HOST_INFER
COMPILE_OMG_PACKAGE
google=ascend_private
$<$<STREQUAL:${ENABLE_OPEN_SRC},True>:ONLY_COMPILE_OPEN_SRC>
)

target_compile_options(ge_compiler PRIVATE


+ 19
- 0
ge/client/ge_api.cc View File

@@ -32,6 +32,9 @@
#include "graph/common/ge_call_wrapper.h"
#include "register/op_registry.h"
#include "common/ge/tbe_plugin_manager.h"
#ifndef ONLY_COMPILE_OPEN_SRC
#include "toolchain/plog.h"
#endif

using domi::OpRegistry;
using std::map;
@@ -129,6 +132,11 @@ Status GEInitializeImpl(const std::map<string, string> &options) {

// Initialize GE, prepare for execution, call GELib::Initialize
Status GEInitialize(const std::map<string, string> &options) {
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(options);
}

@@ -143,6 +151,11 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {
std::string val = option.second.GetString();
str_options[key] = val;
}
#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportInitialize() != SUCCESS) {
GELOGW("Dlog report device log initialize failed.");
}
#endif
return GEInitializeImpl(str_options);
}

@@ -187,6 +200,12 @@ Status GEFinalize() {
// to avoid memory fragment, use malloc_trim to back free stack to system
malloc_trim(0);

#ifndef ONLY_COMPILE_OPEN_SRC
if (DlogReportFinalize() != SUCCESS) {
GELOGW("Dlog report device log finalize failed.");
}
#endif

GELOGT(TRACE_STOP, "GEFinalize finished");
return ret;
}


+ 2
- 2
ge/common/CMakeLists.txt View File

@@ -12,7 +12,7 @@ set(PROTO_LIST
"${METADEF_DIR}/proto/tensorflow/tensor.proto"
"${METADEF_DIR}/proto/tensorflow/tensor_shape.proto"
"${METADEF_DIR}/proto/tensorflow/types.proto"
"${METADEF_DIR}/proto/tensorflow/versions.proto"
"${METADEF_DIR}/proto/tensorflow/versions.proto"
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
@@ -163,7 +163,7 @@ target_include_directories(ge_common_static PRIVATE

target_link_libraries(ge_common_static PRIVATE
$<BUILD_INTERFACE:intf_pub>
ascend_protobuf
ascend_protobuf_static
json
c_sec
$<$<NOT:$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-lrt>


+ 1
- 14
ge/common/dump/dump_op.cc View File

@@ -94,9 +94,6 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
for (auto dim : output_descs.at(i).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -121,9 +118,6 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
for (auto dim : input_descs.at(i).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -220,15 +214,8 @@ Status DumpOp::LaunchDumpOp() {
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str());
uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
}

aicpu::dump::Task task;
task.set_task_id(task_id);
task.set_stream_id(stream_id);
task.mutable_op()->set_op_name(op_desc_->GetName());
task.mutable_op()->set_op_type(op_desc_->GetType());
if (dump_properties_.GetDumpMode() == kDumpOutput) {


+ 2
- 2
ge/common/ge/tbe_plugin_manager.cc View File

@@ -184,7 +184,7 @@ void TBEPluginManager::LoadCustomOpLib() {
std::string fmk_type = std::to_string(domi::TENSORFLOW);
auto it = options_.find(ge::FRAMEWORK_TYPE);
if (it != options_.end()) {
fmk_type = it->second;
fmk_type = it->second;
}
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
@@ -192,7 +192,7 @@ void TBEPluginManager::LoadCustomOpLib() {
if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
(void)domi::OpRegistry::Instance()->Register(reg_data);
domi::OpRegistry::Instance()->Register(reg_data);
}
}
}


+ 1
- 1
ge/common/profiling/ge_profiling.cc View File

@@ -182,7 +182,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
command.module_index);
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}


+ 12
- 12
ge/common/profiling/profiling_manager.cc View File

@@ -89,13 +89,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
#ifdef DAVINCI_SUPPORT_PROFILING
// enable profiling by env
char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
is_load_profiling_ = false; // Change in ProfInit
is_execute_profiling_ = false;

if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
// enable profiling by ge option
if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
options.profiling_options.size()) != EOK) {
if (strncpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
return INTERNAL_ERROR;
}
@@ -125,11 +124,12 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return ge::PARAM_INVALID;
}

if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(),
sizeof(options.job_id.c_str())) != EOK) {
if (strncpy_s(prof_conf.jobId, MSPROF_OPTIONS_DEF_LEN_MAX, options.job_id.c_str(),
MSPROF_OPTIONS_DEF_LEN_MAX - 1) != EOK) {
GELOGE(INTERNAL_ERROR, "copy job_id failed.");
return INTERNAL_ERROR;
}
GELOGI("Job id: %s, original job id: %s.", prof_conf.jobId, options.job_id.c_str());
#endif
return ge::SUCCESS;
}
@@ -159,6 +159,7 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
is_training_trace_ = true;
} catch (...) {
GELOGE(FAILED, "Json prof_conf options is invalid.");
return ge::PARAM_INVALID;
@@ -212,16 +213,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
uint32_t block_dim = task.block_dim;
uint32_t task_id = task.task_id;
uint32_t stream_id = task.stream_id;
std::string shape_type = task.shape_type;
int64_t cur_iter_num = task.cur_iter_num;
data = model_name.append(" ")
.append(op_name).append(" ")
.append(std::to_string(block_dim)).append(" ")
.append(std::to_string(block_dim).append(" ")
.append(std::to_string(task_id)).append(" ")
.append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append(" ")
.append(shape_type).append(" ")
.append(std::to_string(cur_iter_num)).append("\n");
.append(std::to_string(model_id)).append("\n"));

ReporterData reporter_data{};
reporter_data.deviceId = device_id;
@@ -632,6 +629,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
uint64_t module, const std::map<std::string, std::string> &config_para) {
#ifdef DAVINCI_SUPPORT_PROFILING
std::lock_guard<std::mutex> lock(mutex_);
uint64_t training_trace_mask = module & PROF_TRAINING_TRACE_MASK;
if (training_trace_mask == PROF_TRAINING_TRACE_MASK) {
is_training_trace_ = true;
}
int32_t device_num = 0;
vector<int32_t> device_list;
if (ProfParseParam(config_para, device_num, device_list) != SUCCESS) {
@@ -846,7 +847,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
return;
}
}
return;
}



+ 0
- 2
ge/common/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 8
- 0
ge/common/proto/tensorflow/attr_value.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/function.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/graph_library.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/node_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/op_def.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/resource_handle.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/tensor_shape.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

// Protocol buffer representing the shape of tensors.

syntax = "proto3";


+ 8
- 0
ge/common/proto/tensorflow/types.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 8
- 0
ge/common/proto/tensorflow/versions.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software TensorFlow, version 1.15.0 https://github.com/tensorflow/tensorflow
*
* This file is included by GraphEngine so as to support model format conversion from tensorflow model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto3";

package domi.tensorflow;


+ 1
- 1
ge/executor/CMakeLists.txt View File

@@ -197,7 +197,7 @@ target_include_directories(ge_executor PRIVATE
target_link_libraries(ge_executor PRIVATE
$<BUILD_INTERFACE:intf_pub>
json
ascend_protobuf
ascend_protobuf_static
c_sec
$<$<NOT:$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-lrt>
-ldl


+ 95
- 40
ge/executor/ge_executor.cc View File

@@ -209,33 +209,19 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,

namespace ge {
bool GeExecutor::isInit_ = false;

static void InitOpsProtoManger() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "File path %s is invalid.", path.c_str());
return;
class ModelListenerAdapter : public ModelListener {
public:
domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
std::vector<ge::OutputTensorInfo> &outputs) {
if (listener == nullptr) {
GELOGE(ge::FAILED, "listener is null.");
return FAILED;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}

GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}
return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
}

std::shared_ptr<ge::ModelListener> listener;
};

GeExecutor::GeExecutor() {}

@@ -246,16 +232,6 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS;
}

OpTilingManager::GetInstance().LoadSo();

Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
if (initHostCpuEngineStatus != SUCCESS) {
GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
return initHostCpuEngineStatus;
}

InitOpsProtoManger();

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type);
@@ -560,6 +536,60 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
return SUCCESS;
}

// Load model
Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
GELOGI("load model offline begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
"File path is invalid. please check your text file '%s'.", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
return ACL_ERROR_GE_LOAD_MODEL;
}
return SUCCESS;
}

Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener) {
GELOGI("Load model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;

Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModel failed.");
return ACL_ERROR_GE_LOAD_MODEL;
}
return ret;
}

Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGD("unload model %u begin.", model_id);
if (!isInit_) {
@@ -569,7 +599,7 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
Status ret = GraphLoader::DestroyAicpuSessionForInfer(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
return ACL_ERROR_GE_INTERNAL_ERROR;
return ret;
}

std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model =
@@ -587,11 +617,26 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
ret = GraphLoader::UnloadModel(model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GraphLoader] DestroyAicpuSessionForInfer failed. model id: %u", model_id);
return ACL_ERROR_GE_UNLOAD_MODEL;
return ret;
}
return SUCCESS;
}

Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
GELOGI("run model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}

InputData inputs;
GetDomiInputData(input_data, inputs);
OutputData outputs;
GetDomiOutputData(output_data, outputs);

return GraphExecutor::DataInput(inputs, outputs);
}

// Get input and output descriptor
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
@@ -1006,12 +1051,22 @@ Status GeExecutor::GetMemAndWeightSize(const void *model_data, size_t model_size

Status GeExecutor::LoadSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
SingleOp **single_op) {
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op);
return LoadSingleOpV2(model_name, modelData, stream, single_op, 0);
}

Status GeExecutor::LoadSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
SingleOp **single_op, const uint64_t model_id) {
return SingleOpManager::GetInstance().GetOpFromModel(model_name, modelData, stream, single_op, model_id);
}

Status GeExecutor::LoadDynamicSingleOp(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op) {
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op);
return LoadDynamicSingleOpV2(model_name, modelData, stream, single_op, 0);
}

Status GeExecutor::LoadDynamicSingleOpV2(const std::string &model_name, const ge::ModelData &modelData, void *stream,
DynamicSingleOp **single_op, const uint64_t model_id) {
return SingleOpManager::GetInstance().GetDynamicOpFromModel(model_name, modelData, stream, single_op, model_id);
}

Status GeExecutor::ExecuteAsync(SingleOp *executor, const std::vector<DataBuffer> &inputs,


+ 1
- 0
ge/executor/proto/dump_task.proto View File

@@ -108,4 +108,5 @@ message DumpData{
repeated OpOutput output = 3;
repeated OpInput input = 4;
repeated OpBuffer buffer = 5;
string op_name = 6;
}

+ 0
- 2
ge/executor/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


+ 3
- 2
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -39,7 +39,7 @@ namespace {
} \
ge_tensor = MakeShared<GeTensor>(out_desc); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \
return MEMALLOC_FAILED; \
@@ -50,7 +50,8 @@ namespace {
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \


+ 40
- 20
ge/generator/ge_generator.cc View File

@@ -262,10 +262,19 @@ static Status CheckShapeReset(const OpDescPtr &op_desc, bool &change_shape_flag)
change_shape_flag = true;
}
}
for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) {
auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i));
GE_CHECK_NOTNULL(output_desc);
// pass scalar output desc
auto dims = output_desc->GetShape().GetDims();
if (dims.size() == kDynamicDimSize && dims[0] == kDynamicDimValue) {
change_shape_flag = true;
}
}
return SUCCESS;
}

static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor> &inputs_dynamic) {
for (auto input : inputs) {
auto input_desc = input.GetTensorDesc();
GeShape shape_ori = input_desc.GetShape();
@@ -280,6 +289,12 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
bool is_const = false;
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
if (!is_const && shape_ori.GetDims().size() > 0) {
int64_t storage_format = FORMAT_NCHW;
if (ge::AttrUtils::GetInt(desc, ge::ATTR_NAME_STORAGE_FORMAT, storage_format) &&
!ge::AttrUtils::SetListInt(desc, ge::ATTR_NAME_STORAGE_SHAPE, dynamic_shape_dims)) {
GELOGE(FAILED, "Set attr ATTR_NAME_STORAGE_SHAPE fail.");
return FAILED;
}
desc.SetShape(dynamic_shape);
desc.SetShapeRange(dynamic_shape_range);
}
@@ -287,6 +302,7 @@ static void ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTensor>
inputTensor.SetTensorDesc(desc);
inputs_dynamic.push_back(inputTensor);
}
return SUCCESS;
}

class GeGenerator::Impl {
@@ -530,6 +546,24 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
return true;
}

static Status SetModelNameForDump(GeRootModelPtr ge_root_model) {
ModelHelper model_helper;
string model_name = "";
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
return SUCCESS;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr;
@@ -538,7 +572,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
GELOGD("Current ctx is null.");
ctx = nullptr;
}

GeRootModelPtr ge_root_model = nullptr;
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
@@ -562,22 +595,11 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
impl_->build_step_.c_str());
return SUCCESS;
}

GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
ModelHelper model_helper;
string model_name = "";
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
ret = SetModelNameForDump(ge_root_model);
if (ret != SUCCESS) {
return ret;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
if (ret != SUCCESS) {
GELOGE(ret, "Save model failed");
@@ -586,11 +608,9 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
}
return ret;
}

if (ctx != nullptr) {
(void)rtCtxSetCurrent(ctx);
}

return SUCCESS;
}

@@ -684,8 +704,8 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
if (CheckShapeReset(op_desc, dynamic_flag) == SUCCESS && dynamic_flag) {
vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic;
ResetTensorVecShape(inputs, inputs_dynamic);
ResetTensorVecShape(outputs, outputs_dynamic);
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(inputs, inputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
} else {


+ 2
- 1
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);

if (mem_assigner->GetP2PMemOffset() >= 0) {
if (mem_assigner->GetP2PMemOffset() > 0) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
}
@@ -402,6 +402,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
GE_ERRORLOG_AND_ERRORMSG(FAILED, error.c_str());
return FAILED;
}
continuous_mem_start = iter->second.mem_offset_;
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_data_anchor == nullptr, continue);


+ 53
- 0
ge/graph/build/model_builder.cc View File

@@ -582,9 +582,13 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add TBE Kernels and custom aicpu op bin
std::set<std::string> tbe_name_set;
std::set<std::string> aicpu_name_set;
std::set<std::string> aicpu_op_types;
std::set<std::string> aicpu_tf_op_types;
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
// check aicpu op type
CollectCheckAicpuAttr(node_op_desc, aicpu_op_types, aicpu_tf_op_types);
TBEKernelPtr tbe_kernel = node_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
std::string kernel_name;
@@ -606,6 +610,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
tbe_kernel_store_.AddTBEKernel(tbe_kernel);
}

SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);

for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue);
@@ -797,4 +803,51 @@ Status ModelBuilder::CompileSingleOp() {
GE_TIMESTAMP_CALLNUM_END(BatchCompileOp, "GraphBuild::CompileOp");
return ge::SUCCESS;
}

void ModelBuilder::CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types) {
std::string aicpu_optype;
bool has_attr_check_cpu = ge::AttrUtils::GetStr(op_desc, "needCheckCpu", aicpu_optype);
std::vector<std::string> tf_optypes;
bool has_attr_check_tf = ge::AttrUtils::GetListStr(op_desc, "needCheckTf", tf_optypes);
if (has_attr_check_cpu && !aicpu_optype.empty()) {
aicpu_op_types.insert(aicpu_optype);
}

if (has_attr_check_tf && !tf_optypes.empty()) {
aicpu_tf_op_types.insert(tf_optypes.begin(), tf_optypes.end());
}

return;
}

void ModelBuilder::SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types) {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
if (ge::AttrUtils::GetListStr(&model, "needCheckCpu", aicpu_optype_list)) {
GELOGI("Already have aicpu optype size: %zu", aicpu_optype_list.size());
aicpu_op_types.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(&model, "needCheckTf", aicpu_tf_optype_list)) {
GELOGI("Already have aicpu tf optype size: %zu", aicpu_tf_optype_list.size());
aicpu_tf_op_types.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}

// reset list with set
aicpu_optype_list.assign(aicpu_op_types.begin(), aicpu_op_types.end());
aicpu_tf_optype_list.assign(aicpu_tf_op_types.begin(), aicpu_tf_op_types.end());
GELOGI(
"Check Aicpu op types ComputeGraph: %s aicpu_op_types: %zu, aicpu_optype_list: %zu, aicpu_tf_op_types: %zu, "
"aicpu_tf_optype_list:%zu.",
compute_graph_->GetName().c_str(), aicpu_op_types.size(), aicpu_optype_list.size(), aicpu_tf_op_types.size(),
aicpu_tf_optype_list.size());
GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckCpu", aicpu_optype_list), return,
"Set attr needCheckCpu fail.");

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&model, "needCheckTf", aicpu_tf_optype_list), return,
"Set attr needCheckTf fail.");
return;
}
} // namespace ge

+ 6
- 0
ge/graph/build/model_builder.h View File

@@ -83,6 +83,12 @@ class ModelBuilder {

Status CompileSingleOp();

void CollectCheckAicpuAttr(const OpDescPtr &op_desc, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types);

void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types);

uint64_t session_id_;

map<int64_t, size_t> mem_type_to_mem_offset_;


+ 1
- 1
ge/graph/build/stream_allocator.cc View File

@@ -36,7 +36,7 @@ using std::vector;
namespace {
const uint32_t kMaxSwitchStreamNum = 1;
const int64_t kTaskNumPerNormalNode = 3;
const int64_t kTaskNumPerHcclNode = 200;
const int64_t kTaskNumPerHcclNode = 245;
const char *const kTrueStr = "true";
const char *const kFalseStr = "false";



+ 8
- 6
ge/graph/build/stream_graph_optimizer.cc View File

@@ -66,13 +66,13 @@ bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &com
if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
label_set.insert(batch_label);
} else {
GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
GELOGD("Node %s[%s] has no batch_label, subgraph %s, stream id: %ld ", cur_node->GetName().c_str(),
cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
continue;
}

GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
GELOGD("Node %s in subgraph %s stream id: %ld, batch_label: %s, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, batch_label.c_str(), comp_graph->GetDirectNodesSize());
}
if (stream_set.size() > 1 || label_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
@@ -126,12 +126,14 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
run_context.graphStreamList.size());
return FAILED;
}

run_context.stream = run_context.graphStreamList[stream_id];
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());

for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);


+ 9
- 7
ge/graph/build/task_generator.cc View File

@@ -54,9 +54,10 @@ const uint64_t kProfilingFpStartLogid = 1;
const uint64_t kProfilingBpEndLogid = 2;
const uint64_t kProfilingArStartLogid = 3;
const uint64_t kProfilingArEndLogid = 4;
const uint64_t kProfilingIterEndLogid = 255;
const uint64_t kProfilingIterEndLogid = 65535;
const int64_t kHashFactor = 100000;
const int64_t kInvalidGroupId = -1;
const char *const kKernelInfoNameHccl = "ops_kernel_info_hccl";
} // namespace
namespace ge {
TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) {
@@ -348,14 +349,15 @@ Status TaskGenerator::GenerateTask(RunContext &run_context, ComputeGraphPtr &gra
}

// Reset stream id to ge stream id, as graph load must use ge stream to reassign stream
void *ops_kernel_info_store_ptr = kernel_info_store.get();
for (size_t idx = task_list_size_before; idx < task_list_size_after; ++idx) {
task_def_list[idx].set_stream_id(static_cast<uint32_t>(stream_id));
op_name_map[idx] = name;
// Set opsKernelInfoStorePtr and op_index, the two fields be use in DistributeTask and InitTaskInfo
TaskDef *task_def_ptr = &task_def_list[idx];
GE_CHECK_NOTNULL(task_def_ptr);
task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr));
// Set opsKernelInfoStorePtr for hccl which will be use in DistributeTask and InitTaskInfo
if (op_kernel_lib_name == kKernelInfoNameHccl) {
task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(kernel_info_store.get()));
}
}
GELOGD("Call %s to generate node[name:%s(%s), id:%ld, stream_id:%ld] task finished, generate %zu task(s).",
op_kernel_lib_name.c_str(), name.c_str(), type.c_str(), op_id, stream_id,
@@ -567,7 +569,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
continue;
}
string op_type = op_desc->GetType();
if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
if ((!is_single_stream && !op_desc->GetSubgraphInstanceNames().empty()) || separator_types.count(op_type) != 0) {
continuous_op_lists.emplace_back(vector<OpDescPtr>());
} else {
continuous_op_lists.back().emplace_back(op_desc);
@@ -676,7 +678,7 @@ Status TaskGenerator::AutoFindBpOpIndex(const ComputeGraphPtr &graph, ProfilingP
}
}
if (graph->GetNeedIteration()) {
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
profiling_point.end_index.insert(current_idx);
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
op_desc->GetName().c_str(), current_idx);
@@ -773,7 +775,7 @@ Status TaskGenerator::FindBpOfEnv(const ComputeGraphPtr &graph, const std::strin
}

if (graph->GetNeedIteration()) {
if (op_desc->GetName() == NODE_NAME_NET_OUTPUT + '_' + NODE_NAME_STREAM_SWITCH + "_StreamActive") {
if (op_desc->GetName() == NODE_NAME_FLOWCTRL_LOOP_ASSIGNADD) {
profiling_point.end_index.insert(current_idx);
GELOGI("Iter end name %s, idx %u, from Node_Output_IteratorCtrl_StreamSwitch_StreamActive",
op_desc->GetName().c_str(), current_idx);


+ 4
- 1
ge/graph/common/transop_util.cc View File

@@ -23,7 +23,10 @@
namespace {
const int kInvalidTransopDataIndex = -1;
const int kTransOpOutIndex = 0;
std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {{ge::DT_FLOAT, ge::DT_BOOL}};
std::map<ge::DataType, ge::DataType> precision_loss_transfer_map = {
{ge::DT_FLOAT, ge::DT_BOOL},
{ge::DT_INT64, ge::DT_BOOL}
};
} // namespace

namespace ge {


+ 69
- 12
ge/graph/load/graph_loader.cc View File

@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
ModelData &model_data) {
Status ret;
if (!CheckInputPathValid(path)) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
}

GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return GE_EXEC_MODEL_KEY_PATH_INVALID;
}

ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@@ -144,6 +144,63 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
return SUCCESS;
}

Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
Status ret;
ModelData model_data;
ret = LoadDataFromFile(path, key_path, priority, model_data);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}

ret = LoadModel(model_data, listener, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
}

if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}

return ret;
}

Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id) {
GELOGI("Load model begin, model_id:%u.", model_id);

// For GeOp, Open Device 0 here.
GE_CHK_RT_RET(rtSetDevice(0));
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
if (ret != SUCCESS) {
GE_CHK_RT(rtDeviceReset(0));
GELOGE(ret, "LoadModel: Load failed.");
return ret;
}
ret = model_manager->Start(model_id);
if (ret != SUCCESS) {
if (model_manager->Unload(model_id) != SUCCESS) {
GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
}
GELOGE(ret, "LoadModel: Start failed.");
return ret;
}
GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
return SUCCESS;
}

Status GraphLoader::CommandHandle(const Command &command) {
try {
auto model_manager = ModelManager::GetInstance();
@@ -168,16 +225,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
}

Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
size_t mem_size, void *weight_ptr, size_t weight_size) {
size_t memsize, void *weight_ptr, size_t weightsize) {
GELOGI("Load model begin, model_id:%u.", model_id);
// For ACL, Open Device from App.
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(
model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
GELOGE(ret, "Load model failed, model_id:%u.", model_id);
return ret;
}
GELOGI("Load model success, model_id:%u.", model_id);
return SUCCESS;
@@ -202,8 +259,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
return ret;
}

GELOGI("Load model with queue success, model_id:%u.", model_id);
@@ -263,10 +320,10 @@ Status GraphLoader::GetMemoryInfo(int64_t &free) {
return SUCCESS;
}

Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
Status GraphLoader::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->DestroyAicpuKernel(session_id, model_id);
Status ret = model_manager->DestroyAicpuKernel(session_id, model_id, sub_model_id);
if (ret != SUCCESS) {
GELOGE(ret, "Destroy aicpu kernel failed.");
return ret;


+ 7
- 1
ge/graph/load/graph_loader.h View File

@@ -44,6 +44,12 @@ class GraphLoader {

static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);

static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id);

static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);

static Status CommandHandle(const Command &command);

static Status GetMemoryInfo(int64_t &free);
@@ -62,7 +68,7 @@ class GraphLoader {
const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
std::vector<GeTensorDesc> &output_desc);

static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

static Status DestroyAicpuSessionForInfer(uint32_t model_id);



+ 1
- 6
ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -319,9 +319,6 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@@ -479,9 +476,6 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);
@@ -897,6 +891,7 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
toolkit::dumpdata::DumpData dump_data;
dump_data.set_version("2.0");
dump_data.set_dump_time(GetNowTime());
dump_data.set_op_name(op_desc_info.op_name);
for (size_t i = 0; i < op_desc_info.input_format.size(); ++i) {
toolkit::dumpdata::OpInput input;
input.set_data_type(toolkit::dumpdata::OutputDataType(GetIrDataType(op_desc_info.input_data_type[i])));


+ 151
- 144
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh
if (weight_ptr == nullptr) {
weights_mem_base_ = MallocWeightsMem(weights_size);
if (weights_mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
}
is_inner_weight_base_ = true;
}
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh

Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (is_feature_map_mem_has_inited_) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once .");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(FAILED, "call InitFeatureMapMem more than once .");
return FAILED;
}
is_feature_map_mem_has_inited_ = true;

@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;

if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
return FAILED;
}

mem_base_ = static_cast<uint8_t *>(dev_ptr);
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (TotalMemSize() && mem_base_ == nullptr) {
mem_base_ = MallocFeatureMapMem(data_size);
if (mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
}
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]",
runtime_param_.graph_id, mem_base_, data_size);
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
if (p2p_data_size != 0) {
p2p_mem_base_ = MallocP2PMem(p2p_data_size);
if (p2p_mem_base_ == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
return GE_EXEC_ALLOC_P2P_MEM_FAILED;
}
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
p2p_mem_base_, p2p_data_size);
@@ -485,6 +485,8 @@ Status DavinciModel::DoTaskSink() {

GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");

GE_CHK_STATUS_RET(ModelManager::GetInstance()->CheckAicpuOpList(ge_model_), "Check aicpu op type failed.");

GE_CHK_STATUS_RET(InitEntryTask(), "InitEntryTask failed.");

GE_CHK_STATUS_RET(DistributeTask(), "Distribute failed.");
@@ -537,7 +539,7 @@ Status DavinciModel::OpDebugRegister() {

uint32_t op_debug_mode = 0;
(void)ge::AttrUtils::GetInt(ge_model_, ATTR_OP_DEBUG_MODE, op_debug_mode);
GELOGD("The value of op_debug_mode in ge_model_ is %u.", op_debug_mode);
GELOGD("The value of op debug mode in ge_model is %u.", op_debug_mode);
uint32_t debug_task_id = 0;
uint32_t debug_stream_id = 0;
rt_ret = rtDebugRegister(rt_model_handle_, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
@@ -605,7 +607,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
version_ = ge_model_->GetVersion();
name_ = ge_model_->GetName();
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_);
GELOGD("The value of ge.l1Fusion in ge_model_ is %d.", is_l1_fusion_enable_);
GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_);
CheckHasHcomOp();

vector<int64_t> huge_stream_list;
@@ -710,7 +712,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

// collect profiling for ge
GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed");
auto &profiling_manager = ProfilingManager::Instance();
if (profiling_manager.ProfilingModelLoadOn()) {
Status p_ret = ReportProfilingData();
@@ -733,7 +734,6 @@ Status DavinciModel::ReportProfilingData() {
}
ProfilingManager::Instance().ReportProfilingData(model_id_, GetTaskDescInfo(), compute_graph_desc_info);
GE_CHK_STATUS(SinkModelProfile(), "Sink model profiler failed.");
op_list_.clear();

return SUCCESS;
}
@@ -2087,61 +2087,12 @@ Status DavinciModel::SyncVarData() {
return ret;
}

Status DavinciModel::InitModelProfile() {
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size());
op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID()));
}

std::set<uint32_t> task_id_set;
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const auto &task : task_list_) {
GE_CHECK_NOTNULL(task);
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo();
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) {
continue;
}

if (task_id_set.count(task->GetTaskID()) > 0) {
continue;
}

const auto &op_desc = GetOpByIndex(fusion_op_info->op_index);
GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index);

ProfileInfo profile;
profile.fusion_info = *fusion_op_info;
Range range = op_id_map_.equal_range(fusion_op_info->op_index);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
profile.task_count++;
task_id_set.insert(range_idx->second);
}

// memory info
TaskMemInfo &mem_info = profile.memory_info;
const auto input_size = ModelUtils::GetInputSize(op_desc);
const auto output_size = ModelUtils::GetOutputSize(op_desc);
const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc);
const auto weight_size = ModelUtils::GetWeightSize(op_desc);
mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0);
mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0);
mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0);
mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0);
mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size;

profile_list_.emplace_back(profile);
inline int64_t SumSize(const vector<int64_t> &size_list) {
int64_t sum_size = 0;
for (const int64_t &size : size_list) {
sum_size += size;
}

GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size());
return SUCCESS;
return sum_size;
}

Status DavinciModel::SinkModelProfile() {
@@ -2149,12 +2100,18 @@ Status DavinciModel::SinkModelProfile() {
auto &prof_mgr = ProfilingManager::Instance();
ReporterData reporter_data{};
// report model data tag name
std::string tag_name("model_load_info_" + std::to_string(this->Id()));
std::string tag_name;
tag_name.append("model_load_info_").append(std::to_string(this->Id()));
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK,
return FAILED, "Sink model tag memcpy error.");

// Model Header
std::string name = om_name_.empty() ? name_ : om_name_;
string name;
if (!om_name_.empty()) {
name = om_name_;
} else {
name = name_;
}
size_t name_len = name.size();
reporter_data.deviceId = device_id_;
reporter_data.data = (unsigned char *)&name_len;
@@ -2186,71 +2143,128 @@ Status DavinciModel::SinkModelProfile() {
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

int32_t task_num = task_list_.size();
std::multimap<uint32_t, uint32_t> op_id_map;
std::set<uint32_t> task_id_set;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
// when type is RT_MODEL_TASK_KERNEL, ctx is not null
if (fusion_op_info != nullptr) {
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_id = task->GetTaskID();
if (op_num > 0) {
GELOGI("task.id = %u, opNum = %u", task_id, op_num);
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
}
}
}

struct memoryInfo {
int64_t input_size;
int64_t output_size;
int64_t weight_size;
int64_t workspace_size;
int64_t total_size;

memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {}
};

using CIT = std::multimap<uint32_t, uint32_t>::const_iterator;
using Range = std::pair<CIT, CIT>;
for (const ProfileInfo &profile : profile_list_) {
// op name after fusion
string fusion_op_name = profile.fusion_info.op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
uint32_t op_num = profile.fusion_info.original_op_names.size();
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = profile.fusion_info.original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
for (int32_t i = 0; i < task_num; i++) {
auto task = task_list_[i];
GE_CHECK_NOTNULL(task);
auto fusion_op_info = task->GetFusionOpInfo();
if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) {
uint32_t task_id = task->GetTaskID();
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_count = 0;
if (task_id_set.count(task_id) != 0) {
continue;
}

uint32_t op_id = fusion_op_info->op_index;
Range range = op_id_map.equal_range(op_id);
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) {
task_count++;
uint32_t task_id = range_idx->second;
task_id_set.insert(task_id);
}

// op name after fusion
string fusion_op_name = fusion_op_info->op_name;
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size();
reporter_data.data = (unsigned char *)&fusion_op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;

reporter_data.data = (unsigned char *)fusion_op_name.c_str();
reporter_data.dataLen = fusion_op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// original op name before fusion
reporter_data.data = (unsigned char *)&op_num;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

for (uint32_t k = 0; k < op_num; k++) {
std::string op_name = fusion_op_info->original_op_names[k];
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size();
reporter_data.data = (unsigned char *)&op_name_len;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
reporter_data.data = (unsigned char *)op_name.c_str();
reporter_data.dataLen = op_name_len;
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
uint32_t streamId = task->GetStreamId();
reporter_data.data = (unsigned char *)&streamId;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
struct memoryInfo memory_info;
uint32_t op_index = fusion_op_info->op_index;
auto iter = op_list_.find(op_index);
GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index);
auto op_desc = iter->second;
memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc));
memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc));
memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc));
memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc));
memory_info.total_size =
memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size;
reporter_data.data = (unsigned char *)&memory_info;
reporter_data.dataLen = sizeof(struct memoryInfo);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}

// stream id info
uint32_t streamId = profile.fusion_info.stream_id;
reporter_data.data = (unsigned char *)&streamId;
reporter_data.dataLen = sizeof(int32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// memory info
reporter_data.data = (unsigned char *)&profile.memory_info;
reporter_data.dataLen = sizeof(profile.memory_info);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

// task info
reporter_data.data = (unsigned char *)&profile.task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
// task info
reporter_data.data = (unsigned char *)&task_count;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());

Range task_range = op_id_map.equal_range(op_id);
for (CIT idx = task_range.first; idx != task_range.second; ++idx) {
uint32_t task_id = idx->second;
reporter_data.data = (unsigned char *)&task_id;
reporter_data.dataLen = sizeof(uint32_t);
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED,
"Reporter data fail, model id:%u.", this->Id());
}
}
}

return SUCCESS;
}

@@ -2824,19 +2838,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
return SUCCESS;
}

Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) {
for (size_t i = 0; i < total_io_addrs.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]);
Status DavinciModel::UpdateKnownZeroCopyAddr() {
for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
if (it_in != knonw_input_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_input_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_input_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
}
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]);
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
if (it_out != knonw_output_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i],
knonw_output_data_info_.at(total_io_addrs[i]));
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]);
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_output_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
}
}
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success.");
@@ -2865,7 +2879,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
} else {
total_io_addrs_ = orig_total_io_addrs_;
}
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed.");
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

if (total_args_size_ == 0) {
GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_);
@@ -2932,14 +2946,7 @@ Status DavinciModel::MallocKnownArgs() {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
// malloc dynamic and static hybrid memory
if (total_hybrid_args_size_ != 0) {
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}

// malloc fixed addr memory, eg: rts op
if (total_fixed_addr_size_ != 0) {
GELOGI("Begin to allocate fixed addr.");
@@ -2993,7 +3000,9 @@ Status DavinciModel::DistributeTask() {
}

auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX);
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL)
&& (task_type != RT_MODEL_TASK_KERNEL_EX)
&& (task_type != RT_MODEL_TASK_HCCL);
GE_IF_BOOL_EXEC(no_need_profiling, continue);

SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
@@ -3008,8 +3017,6 @@ Status DavinciModel::DistributeTask() {
task_desc_info.block_dim = task_def.kernel().block_dim();
task_desc_info.task_id = task->GetTaskID();
task_desc_info.stream_id = task->GetStreamId();
task_desc_info.shape_type = "static";
task_desc_info.cur_iter_num = 0;
task_desc_info_.emplace_back(task_desc_info);
if (flag) {
if (task->GetSktTaskID() != 0xFFFFFFFF) {


+ 22
- 33
ge/graph/load/new_model_manager/davinci_model.h View File

@@ -76,20 +76,6 @@ struct timeInfo {
int64_t dumpEndTime;
};

struct TaskMemInfo {
int64_t input_size{0};
int64_t output_size{0};
int64_t weight_size{0};
int64_t workspace_size{0};
int64_t total_size{0};
};

struct ProfileInfo {
FusionOpInfo fusion_info;
TaskMemInfo memory_info;
uint32_t task_count{0};
};

enum ExecuteMode {
INITIALIZATION,
SYNCHRONIZATION,
@@ -150,6 +136,20 @@ class DavinciModel {
///
void SetId(uint32_t model_id) { model_id_ = model_id; }

///
/// @ingroup ge
/// @brief Get SubModelId
/// @return sub model ID
///
uint32_t SubModelId() const { return sub_model_id_; }

///
/// @ingroup ge
/// @brief Set SubModelId
/// @return sub model ID
///
void SetSubModelId(uint32_t sub_model_id) { sub_model_id_ = sub_model_id; }

static void *Run(DavinciModel *model_pointer);

///
@@ -240,6 +240,8 @@ class DavinciModel {
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }

// get Op
const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }

OpDescPtr GetOpByIndex(uint32_t index) const {
if (op_list_.find(index) == op_list_.end()) {
return nullptr;
@@ -448,6 +450,10 @@ class DavinciModel {

int64_t GetLoadEndTime() { return load_end_time_; }

Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status ReportProfilingData();

void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@@ -484,14 +490,6 @@ class DavinciModel {
void SetTotalIOAddrs(vector<void *> &io_addrs) {
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
}
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
uint32_t GetHybridArgsSize() {
return total_hybrid_args_size_;
}
void *GetCurrentHybridArgsAddr(uint32_t offset) {
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
return cur_args;
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@@ -510,7 +508,7 @@ class DavinciModel {
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
Status UpdateKnownZeroCopyAddr();
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@@ -812,11 +810,6 @@ class DavinciModel {

void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);

Status InitModelProfile();
Status SinkModelProfile();

Status SinkTimeProfile(const InputData &current_data);

Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);

@@ -836,6 +829,7 @@ class DavinciModel {

uint32_t model_id_;
uint32_t runtime_model_id_;
uint32_t sub_model_id_ = 0;
string name_;

// used for inference data dump
@@ -952,8 +946,6 @@ class DavinciModel {
void *args_ = nullptr;
void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;
@@ -993,9 +985,6 @@ class DavinciModel {
// key: input_index: input is merge node; value: each gear info and each output shape
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
std::vector<std::vector<int64_t>> all_gears_info_;

std::multimap<uint32_t, uint32_t> op_id_map_;
std::vector<ProfileInfo> profile_list_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

+ 267
- 54
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -18,6 +18,8 @@

#include <string>

#include "mmpa/mmpa_api.h"
#include "aicpu/aicpu_schedule/aicpu_op_type_list.h"
#include "common/dump/dump_manager.h"
#include "common/l2_cache_optimize.h"
#include "common/profiling/profiling_manager.h"
@@ -30,6 +32,7 @@
#include "graph/load/new_model_manager/davinci_model_parser.h"
#include "model/ge_root_model.h"
#include "graph/common/local_context.h"
#include "graph/utils/attr_utils.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "hybrid/hybrid_davinci_model.h"

@@ -51,7 +54,7 @@ const char *const kBatchLoadBuf = "batchLoadsoFrombuf";
const char *const kDeleteCustOp = "deleteCustOp";
const int kTimeSpecNano = 1000000000;
const int kTimeSpecMiro = 1000000;
const int kSessionMaxBias = 100;
const int kOpNameMaxSize = 100;
struct CustAicpuSoBuf {
uint64_t kernelSoBuf;
uint32_t kernelSoBufLen;
@@ -78,7 +81,8 @@ ModelManager::ModelManager() {
session_id_bias_ = 0;
}

Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) {
Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
uint32_t sub_model_id) {
STR_FWK_OP_KERNEL param_base = {};
void *devicebase = nullptr;
void *aicpu_kernel_addr = nullptr;
@@ -88,11 +92,11 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
param_base.fwkKernelBase.fwk_kernel.sessionID = session_id;
if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
std::to_string(sub_model_id);
auto iter = model_aicpu_kernel_.find(model_key);
if (iter != model_aicpu_kernel_.end()) {
GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
GELOGD("kernel destroy session_id %lu, model_id %u, sub_model_id %u..", session_id, model_id, sub_model_id);
v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
// Insert size of aicpu kernel vector in the first element
v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size());
@@ -177,7 +181,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}

void ModelManager::DestroyAicpuSession(uint64_t session_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
auto it = sess_ids_.find(session_id);
if (it == sess_ids_.end()) {
GELOGI("The session: %lu not created.", session_id);
@@ -190,7 +194,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
GE_CHK_RT(rtSetDevice(static_cast<int32_t>(GetContext().DeviceId())));
}

Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0);
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0, 0);
if (ret != SUCCESS) {
GELOGW("The session: %lu destroy failed.", session_id);
} else {
@@ -206,7 +210,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
}

ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@@ -224,12 +228,14 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
return SUCCESS;
}

ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id) {
GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
std::to_string(sub_model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id,
sub_model_id);
if (ret != SUCCESS) {
GELOGE(FAILED, "Destroy aicpu kernel failed.");
return FAILED;
@@ -238,10 +244,12 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
return SUCCESS;
}

ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id,
uint64_t kernel_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id) + "_" +
std::to_string(sub_model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
v_aicpu_kernel = model_aicpu_kernel_.at(model_key);
}
@@ -251,7 +259,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
}

ModelManager::~ModelManager() {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
model_map_.clear();
model_aicpu_kernel_.clear();
cust_aicpu_so_.clear();
@@ -359,24 +367,25 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge

void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
model_map_[id] = davinci_model;
}

void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);
hybrid_model_map_[id] = hybrid_model;
}

Status ModelManager::DeleteModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = model_map_.find(id);
auto hybrid_model_it = hybrid_model_map_.find(id);
if (it != model_map_.end()) {
uint64_t session_id = it->second->GetSessionId();
std::string model_key = std::to_string(session_id) + "_" + std::to_string(id);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(id) + "_" +
std::to_string(it->second->SubModelId());
auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
(void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
@@ -385,22 +394,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return GE_EXEC_MODEL_ID_INVALID;
}

return SUCCESS;
}

std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = model_map_.find(id);
return (it == model_map_.end()) ? nullptr : it->second;
}

std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(map_mutex_);

auto it = hybrid_model_map_.find(id);
return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@@ -971,9 +980,8 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
}

Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetCurShape Failed, Invalid Model ID %u!", model_id);
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetCurShape(batch_info, dynamic_type);
return SUCCESS;
}
@@ -986,8 +994,7 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
}

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetModelAttr Failed, Invalid Model ID %u!", model_id);
GE_CHECK_NOTNULL(davinci_model);
davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}
@@ -997,8 +1004,9 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);

return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}

@@ -1013,18 +1021,28 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetAIPPInfo(index, aipp_info);
}

Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);

return davinci_model->GetAippType(index, type, aipp_index);
}

Status ModelManager::GenSessionId(uint64_t &session_id) {
const uint64_t kSessionTimeMask = 0xffffffffffff0000;
const uint64_t kSessionPidMask = 0x000000000000ff00;
const uint64_t kSessionBiasMask = 0x00000000000000ff;

const uint64_t kMaskPerOffset = 8;

std::lock_guard<std::mutex> lock(session_id_create_mutex_);

mmTimeval tv;
@@ -1032,12 +1050,14 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {
GELOGE(INTERNAL_ERROR, "Failed to get current time.");
return INTERNAL_ERROR;
}
session_id = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us
uint64_t timestamp = static_cast<uint64_t>(tv.tv_sec * kTimeSpecMiro + tv.tv_usec); // 1000000us

static uint32_t pid = mmGetPid();

session_id_bias_++;
// max bais 100.
session_id_bias_ = session_id_bias_ % kSessionMaxBias;
session_id = session_id * kSessionMaxBias + session_id_bias_;
session_id = ((timestamp<<kMaskPerOffset<<kMaskPerOffset) & kSessionTimeMask) +
((pid<<kMaskPerOffset) & kSessionPidMask) + (session_id_bias_ & kSessionBiasMask);

GELOGD("Generate new session id: %lu.", session_id);
return SUCCESS;
@@ -1053,15 +1073,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
mmTimespec timespec = mmGetTickCount();

ModelHelper model_helper;
Status ret = model_helper.LoadRootModel(model);
if (model_helper.GetModelType()) {
bool is_shape_unknown = false;
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
"CheckIsUnknownShape failed, model id:%u", model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
}
}
Status ret = model_helper.LoadModel(model);
if (ret != SUCCESS) {
GELOGE(ret, "load model failed.");
return ret;
@@ -1075,8 +1087,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} catch (...) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
return INTERNAL_ERROR;
}
ret = davinci_model->Assign(ge_model);
if (ret != SUCCESS) {
@@ -1088,7 +1100,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model->SetDeviceId(device_id);
@@ -1220,14 +1232,15 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"Invalid model id %u, check whether model has been loaded or not.", model_id);
"Invalid model id %u, check weather model has been loaded or not.", model_id);

if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");
// Zero copy is enabled by default, no need to judge.
uint64_t session_id_davinci = davinci_model->GetSessionId();
uint32_t model_id_davinci = davinci_model->GetModelId();
Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci);
uint32_t sub_model_id = davinci_model->SubModelId();
Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci, sub_model_id);
if (status != SUCCESS) {
GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci,
model_id_davinci);
@@ -1243,11 +1256,11 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
}

Status ModelManager::CreateAicpuSession(uint64_t session_id) {
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
auto it = sess_ids_.find(session_id);
// never been created by any model
if (it == sess_ids_.end()) {
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0);
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0, 0);
if (ret == SUCCESS) {
(void)sess_ids_.insert(session_id);
GELOGI("The session: %lu create success.", session_id);
@@ -1462,7 +1475,8 @@ void ModelManager::GenModelId(uint32_t *id) {
if (id == nullptr) {
return;
}
std::lock_guard<std::recursive_mutex> lock(map_mutex_);

std::lock_guard<std::mutex> lock(map_mutex_);
*id = ++max_model_id_;
}

@@ -1534,4 +1548,203 @@ Status ModelManager::EnableExceptionDump(const std::map<string, string> &options
return SUCCESS;
}

Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
std::vector<std::string> &aicpu_tf_optype_list) {
std::string kernel_name = "checkOpType";
GELOGI("LaunchKernelCheckAicpuOpType in, kernel name %s", kernel_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
std::vector<SysOpInfo> req_aicpu_op_info_list;
std::vector<SysOpInfo> res_aicpu_op_info_list;
std::vector<ReturnCode> res_ret_code_list;

if (aicpu_optype_list.empty() && aicpu_tf_optype_list.empty()) {
GELOGI("No need to check aicpu op type.");
return SUCCESS;
}

vector<void *> allocated_mem;
rtError_t status;
rtStream_t stream = nullptr;
void *args = nullptr;

void *d_req_op_list = nullptr;
void *d_res_op_list = nullptr;
void *d_ret_code_list = nullptr;

size_t aicpu_op_nums = aicpu_optype_list.size();
size_t tf_op_nums = aicpu_tf_optype_list.size();
size_t op_nums = aicpu_op_nums + tf_op_nums;
std::function<void()> callback = [&]() {
for (auto mem : allocated_mem) {
GE_CHK_RT(rtFree(mem));
}
};
GE_MAKE_GUARD(release, callback);
// malloc sysOpInfoList in SysOpCheckInfo
status = rtMalloc(&d_req_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_req_op_list);

// malloc sysOpInfoList in SysOpCheckResp
status = rtMalloc(&d_res_op_list, op_nums * sizeof(SysOpInfo), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_res_op_list);

// malloc returnCodeList in SysOpCheckResp
status = rtMalloc(&d_ret_code_list, op_nums * sizeof(ReturnCode), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_ret_code_list);

for (const auto &op_type : aicpu_optype_list) {
SysOpInfo op_info;
// malloc op_type name in SysOpInfo
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.length(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_op_type_name);
GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.length(), op_type.c_str(), op_type.length(), RT_MEMCPY_HOST_TO_DEVICE));
op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
op_info.opLen = op_type.length();
op_info.kernelsType = CPU_KERNEL;
req_aicpu_op_info_list.emplace_back(op_info);
}

for (const auto &op_type : aicpu_tf_optype_list) {
SysOpInfo op_info;
// malloc op_type name in SysOpInfo
void *d_op_type_name = nullptr;
status = rtMalloc(&d_op_type_name, op_type.size(), RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(d_op_type_name);
GE_CHK_RT(rtMemcpy(d_op_type_name, op_type.size(), op_type.c_str(), op_type.size(), RT_MEMCPY_HOST_TO_DEVICE));
op_info.opType = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_op_type_name));
op_info.opLen = op_type.size();
op_info.kernelsType = TF_KERNEL;
req_aicpu_op_info_list.emplace_back(op_info);
}
GELOGI("Check aicpu op all attr size: %zu, real attr size: %zu.", op_nums, req_aicpu_op_info_list.size());
GE_CHK_RT(rtMemcpy(d_req_op_list, sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), req_aicpu_op_info_list.data(),
sizeof(SysOpInfo) * req_aicpu_op_info_list.size(), RT_MEMCPY_HOST_TO_DEVICE));

SysOpCheckInfo op_check_info_req = { 0 };
SysOpCheckResp op_check_info_res = { 0 };
op_check_info_req.opListNum = op_nums;
op_check_info_req.offSetLen = sizeof(SysOpCheckInfo);
op_check_info_req.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_req_op_list));

op_check_info_res.opListNum = 0;
op_check_info_res.isWithoutJson = 0;
op_check_info_res.returnCodeList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_ret_code_list));
op_check_info_res.sysOpInfoList = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(d_res_op_list));

uint32_t args_size = sizeof(SysOpCheckInfo) + sizeof(SysOpCheckResp);
status = rtMalloc(&args, args_size, RT_MEMORY_HBM);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status);
return RT_ERROR_TO_GE_STATUS(status);
}
allocated_mem.push_back(args);
GE_CHK_RT(rtMemcpy(args, sizeof(SysOpCheckInfo), reinterpret_cast<void *>(&op_check_info_req), sizeof(SysOpCheckInfo),
RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtMemcpy(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)),
sizeof(SysOpCheckResp), reinterpret_cast<void *>(&op_check_info_res), sizeof(SysOpCheckResp), RT_MEMCPY_HOST_TO_DEVICE));
GE_CHK_RT(rtStreamCreate(&stream, 0));
GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, args, args_size, nullptr, stream));

status = rtStreamSynchronize(stream);
if (status != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status);
GE_CHK_RT(rtStreamDestroy(stream));
return RT_ERROR_TO_GE_STATUS(status);
}

// Check the response
SysOpCheckResp *d_op_check_info_res =
reinterpret_cast<SysOpCheckResp *>(reinterpret_cast<void *>(static_cast<uintptr_t>(static_cast<uint64_t>(
reinterpret_cast<uintptr_t>(args)) + op_check_info_req.offSetLen)));
(void)memset_s(&op_check_info_res, sizeof(SysOpCheckResp), 0, sizeof(SysOpCheckResp));
GE_CHK_RT(rtMemcpy(&op_check_info_res, sizeof(SysOpCheckResp), d_op_check_info_res, sizeof(SysOpCheckResp),
RT_MEMCPY_DEVICE_TO_HOST));

if (op_check_info_res.isWithoutJson) {
GELOGI("No need to check aicpu in this scenoria.");
GE_CHK_RT(rtStreamDestroy(stream));
return SUCCESS;
}
uint64_t res_op_nums = op_check_info_res.opListNum;
GELOGI("Check aicpu type, is without json: %d, res op num: %lu.", op_check_info_res.isWithoutJson, res_op_nums);
if (res_op_nums != 0) {
res_ret_code_list.clear();
res_ret_code_list.resize(res_op_nums);
res_aicpu_op_info_list.clear();
res_aicpu_op_info_list.resize(res_op_nums);
GE_CHK_RT(rtMemcpy(res_ret_code_list.data(), sizeof(ReturnCode) * res_op_nums,
reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.returnCodeList)),
sizeof(ReturnCode) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
GE_CHK_RT(rtMemcpy(res_aicpu_op_info_list.data(), sizeof(SysOpInfo) * res_op_nums,
reinterpret_cast<void *>(static_cast<uintptr_t>(op_check_info_res.sysOpInfoList)),
sizeof(SysOpInfo) * res_op_nums, RT_MEMCPY_DEVICE_TO_HOST));
if (res_ret_code_list.size() != res_aicpu_op_info_list.size() || res_ret_code_list.size() != res_op_nums) {
GELOGE(FAILED, "Number of retcode is not equal to number of op type.");
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
}
std::string fail_reason;
for (uint32_t i = 0; i < res_op_nums; i++) {
ReturnCode ret_code = res_ret_code_list.at(i);
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i);
GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType,
aicpu_info.kernelsType, aicpu_info.opLen, ret_code);
std::vector<char> op_name;
op_name.clear();
op_name.resize(kOpNameMaxSize);
GE_CHK_RT(rtMemcpy(op_name.data(), aicpu_info.opLen, reinterpret_cast<void *>(aicpu_info.opType),
aicpu_info.opLen, RT_MEMCPY_DEVICE_TO_HOST));
std::string kernel_type =
(static_cast<OpKernelType>(aicpu_info.kernelsType) == TF_KERNEL) ? "TF_KERNEL" : "CPU_KERNEL";
string op_name_str(op_name.data());
fail_reason += "op_type: " + op_name_str + " kernel_type: " + kernel_type +
" ret code:" + std::to_string(static_cast<int>(ret_code)) +
"<0: op_type, 1: format, 2: datatype> \n";
}
fail_reason += "not support.";
GELOGE(FAILED, "Check aicpu op_type failed. details: %s", fail_reason.c_str());
GE_CHK_RT(rtStreamDestroy(stream));
return FAILED;
}

GE_CHK_RT(rtStreamDestroy(stream));
GELOGI("Cpu kernel launch check optype task success.");
return SUCCESS;
}

Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
if (!aicpu_need_check && !tf_need_check) {
GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
return SUCCESS;
}
GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
"Launch check aicpu op type failed.");
return SUCCESS;
}

} // namespace ge

+ 11
- 4
ge/graph/load/new_model_manager/model_manager.h View File

@@ -273,7 +273,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

std::shared_ptr<hybrid::HybridDavinciModel> GetHybridModel(uint32_t id);

ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id);
ge::Status KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id,
uint32_t sub_model_id);

ge::Status CreateAicpuSession(uint64_t session_id);

@@ -281,9 +282,9 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

void DestroyAicpuSession(uint64_t session_id);

ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
ge::Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id);

ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id);
ge::Status CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint32_t sub_model_id, uint64_t kernel_id);

ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

@@ -295,6 +296,11 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

ge::Status LaunchKernelCustAicpuSo(const string &kernel_name);

ge::Status LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_optype_list,
std::vector<std::string> &aicpu_tf_optype_list);

ge::Status CheckAicpuOpList(GeModelPtr ge_model);

ge::Status GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info);

ge::Status GenSessionId(uint64_t &session_id);
@@ -353,7 +359,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
uint32_t max_model_id_;
std::recursive_mutex map_mutex_;
std::mutex map_mutex_;
std::mutex sess_ids_mutex_;
std::mutex session_id_create_mutex_;
static::std::mutex exeception_infos_mutex_;
uint64_t session_id_bias_;


+ 16
- 9
ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -97,14 +97,16 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin

// 2.2 Collect aicpu kernel
uint64_t kernel_id = fwk_op_kernel.fwkKernelBase.fwk_kernel.kernelID;
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(), kernel_id) != SUCCESS,
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuKernel(session_id, davinci_model->Id(),
davinci_model->SubModelId(), kernel_id) != SUCCESS,
GELOGE(FAILED, "CreateAicpuKernel error.");
return FAILED;)
// 2.3 Create session
GE_CHECK_NOTNULL(ModelManager::GetInstance());
GE_IF_BOOL_EXEC(ModelManager::GetInstance()->CreateAicpuSession(session_id) != SUCCESS,
GELOGE(FAILED, "CreateAicpuSession error. session id: %lu", session_id);
return FAILED;)
ret = ModelManager::GetInstance()->CreateAicpuSession(session_id);
GE_IF_BOOL_EXEC(ret != SUCCESS,
GELOGE(ret, "CreateAicpuSession error. session id: %lu", session_id);
return ret;)

kernel_buf_size_ = sizeof(STR_FWK_OP_KERNEL);
if (davinci_model_->IsKnownNode()) {
@@ -132,6 +134,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy error, ret: Ox%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

InitDumpTask(input_output_addr, op_desc);
GELOGI("KernelExTaskInfo knonw node Init Success.");
return SUCCESS;
}
@@ -166,11 +169,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)

if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = input_output_addr_;
}
InitDumpTask(input_output_addr_, op_desc);
if (davinci_model_->GetOpDugReg()) {
GELOGI("Op debug is open in kernel ex task info");
dump_args_ = input_output_addr_;
@@ -200,6 +199,14 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
return SUCCESS;
}

void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = input_output_addr_;
}
}

Status KernelExTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
auto kernel_ex_def = task_def.kernel_ex();
uint32_t op_index = kernel_ex_def.op_index();


+ 2
- 0
ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.h View File

@@ -60,6 +60,8 @@ class KernelExTaskInfo : public TaskInfo {
private:
Status CopyTaskInfo(const domi::KernelExDef &kernel_def, const RuntimeParam &rts_param, const OpDescPtr &op_desc);

void InitDumpTask(void *addr, const OpDescPtr &op_desc);

uint32_t task_id_;
uint32_t stream_id_;
uint32_t dump_flag_;


+ 90
- 77
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -90,18 +90,20 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
fusion_op_info_.op_name = op_desc_->GetName());

string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
// get bin_file_key
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
// new aicpu kernel(rtCpuKernelLaunch) no need to check function
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ == ccKernelType::TE) {
// get bin_file_key
string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
return RT_ERROR_TO_GE_STATUS(rt_ret););
@@ -370,11 +372,7 @@ Status KernelTaskInfo::SuperKernelDistribute() {
Status KernelTaskInfo::Distribute() {
GELOGD("KernelTaskInfo Distribute Start.");
if (davinci_model_->IsKnownNode()) {
if (kernel_type_ == ccKernelType::TE) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
}
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}
rtError_t rt_ret = RT_ERROR_NONE;
@@ -430,31 +428,36 @@ Status KernelTaskInfo::UpdateArgs() {
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);

vector<void *> io_addrs;
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (kernel_type_ == ccKernelType::TE) {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
davinci_model_->SetTotalIOAddrs(io_addrs);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
auto addrs_size = sizeof(uint64_t) * io_addrs.size();
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
}

davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@@ -530,18 +533,33 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
}

Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
const domi::KernelDef &kernel_def = task_def.kernel();
domi::KernelDef kernel_def = task_def.kernel();
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);

// get opcontext stored in model
const domi::KernelContext &context = kernel_def.context();
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type_ == ccKernelType::TE) {
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(kernel_def.args_size());
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
// get opdesc
op_desc_ = davinci_model->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_);
// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > op_desc_->GetOutputsSize()) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc_->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
}
return SUCCESS;
}
@@ -553,6 +571,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
GE_CHECK_NOTNULL(op_desc);
if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
InitDumpTask(offset);
return SUCCESS;
}

@@ -617,15 +637,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
return FAILED;
}
skt_dump_args_ = static_cast<char *>(args_) + offset;
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (IsL1FusionOp(op_desc)) {
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
} else {
dump_flag_ = RT_KERNEL_DUMPFLAG;
}
dump_args_ = static_cast<char *>(args_) + offset;
}
InitDumpTask(offset);

GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
"Op debug is open in TVM task info");
@@ -870,7 +882,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}

// copy args to new host memory
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
@@ -878,23 +890,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return FAILED;
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

if (davinci_model_->IsKnownNode()) {
return SUCCESS;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();

vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
vector<void *> io_addrs;
@@ -911,6 +908,19 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
}

auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
@@ -925,16 +935,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (IsL1FusionOp(op_desc)) {
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
} else {
dump_flag_ = RT_KERNEL_DUMPFLAG;
}
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
}
InitDumpTask(sizeof(aicpu::AicpuParamHead));
if (davinci_model_->GetOpDugReg()) {
GELOGI("Op debug is open in aicpu task info");
dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
@@ -948,6 +949,18 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return SUCCESS;
}

void KernelTaskInfo::InitDumpTask(uint32_t offset) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc_->GetName())) {
if (IsL1FusionOp(op_desc_)) {
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
} else {
dump_flag_ = RT_KERNEL_DUMPFLAG;
}
dump_args_ = static_cast<char *>(args_) + offset;
}
}

Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
if (ext_info.empty()) {
return SUCCESS;


+ 3
- 3
ge/graph/load/new_model_manager/task_info/kernel_task_info.h View File

@@ -129,7 +129,9 @@ class KernelTaskInfo : public TaskInfo {
Status SuperKernelDistribute();
bool IsL1FusionOp(const OpDescPtr &op_desc);

// For super kernel
void InitDumpTask(uint32_t offset);

// For super kernel
Status SaveSKTDumpInfo();
void UpdateTaskId();
void UpdateSKTTaskId();
@@ -159,9 +161,7 @@ class KernelTaskInfo : public TaskInfo {
OpDescPtr op_desc_;
DavinciModel *davinci_model_;
uint32_t args_offset_ = 0;
uint32_t hybrid_args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
std::unique_ptr<uint8_t[]> args_addr = nullptr;
bool call_save_dump_ = false;

// aicpu ext_info device mem


+ 30
- 2
ge/graph/manager/graph_manager.cc View File

@@ -23,15 +23,25 @@
#include <sstream>
#include <string>
#include <thread>
#include <utility>

#include "common/ge/ge_util.h"
#include "common/math/math_util.h"
#include "common/thread_pool.h"
#include "common/util.h"
#include "external/graph/types.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/common/ge_types.h"
#include "analyzer/analyzer.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/common/local_context.h"
#include "graph/common/transop_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/ge_global_options.h"
#include "graph/ge_local_context.h"
#include "graph/manager/graph_mem_allocator.h"
#include "graph/manager/util/rt_context_util.h"
#include "graph/partition/dynamic_shape_partition.h"
#include "graph/passes/enter_pass.h"
@@ -51,6 +61,8 @@
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/input_output_connection_identify_pass.h"
#include "graph/passes/iterator_op_pass.h"
@@ -65,6 +77,7 @@
#include "graph/passes/permute_pass.h"
#include "graph/passes/prune_pass.h"
#include "graph/passes/ref_identity_delete_op_pass.h"
#include "graph/passes/replace_with_empty_const_pass.h"
#include "graph/passes/remove_same_const_pass.h"
#include "graph/passes/reshape_recovery_pass.h"
#include "graph/passes/reshape_remove_pass.h"
@@ -75,12 +88,14 @@
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h"
#include "graph/passes/transop_symmetry_elimination_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/useless_control_out_remove_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h"
#include "graph/passes/end_of_sequence_add_control_pass.h"
@@ -91,6 +106,9 @@
#include "graph/passes/memcpy_addr_async_pass.h"
#include "graph/build/label_allocator.h"
#include "graph/utils/tensor_adapter.h"
#include "graph/utils/type_utils.h"
#include "graph/graph_util.h"
#include "graph/types.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
#include "ir_build/atc_ir_common.h"
@@ -518,7 +536,7 @@ Status GraphManager::CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_gr
return SUCCESS;
}

Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph,
Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_graph,
Graph2SubGraphInfoList &sub_graph_map, uint64_t session_id) {
GE_CHECK_NOTNULL(compute_graph);
// use default 16 multi thread
@@ -719,6 +737,9 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
GeRootModelPtr &ge_root_model, uint64_t session_id) {
GE_CHECK_NOTNULL(graph_node);
GE_CHECK_NOTNULL(compute_graph);

CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId());
GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph);
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph);
GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts",
GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts,
@@ -2421,6 +2442,13 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
continue;
}
auto model_id = model->GetModelId();
// unknown model not release
bool is_unknown_shape = false;
GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape));
if (is_unknown_shape) {
GELOGD("model_id[%u] graph_id[%u] is unknown model, not release memory", model_id, graph_id);
continue;
}
// not loaded,no need unload
if (!it.second->GetLoadFlag()) {
GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id);
@@ -2438,7 +2466,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
continue;
}
result = GraphLoader::DestroyAicpuKernel(session_id, model_id);
result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
if (result != SUCCESS) {
GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
graph_id);


+ 3
- 0
ge/graph/manager/graph_mem_allocator.cc View File

@@ -16,7 +16,10 @@

#include "graph/manager/graph_mem_allocator.h"

#include <set>
#include <string>

#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_caching_allocator.h"
#include "graph/manager/rdma_pool_allocator.h"



+ 33
- 0
ge/graph/optimize/graph_optimize.cc View File

@@ -336,4 +336,37 @@ Status GraphOptimize::IdentifyReference(ComputeGraphPtr &compute_graph) {
}
return SUCCESS;
}
Status GraphOptimize::OptimizeWholeGraph(ComputeGraphPtr &compute_graph) {
if (compute_graph == nullptr) {
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeWholeGraph]: compute_graph is nullptr.");
return GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL;
}

std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeWholeGraph failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}

auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
GELOGI("optimize by opskernel in OptimizeWholeGraph. num of graph_optimizer is %zu.", graph_optimizer.size());
Status ret = SUCCESS;
string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
GELOGD("[OptimizeWholeGraph]: engine type will exclude: %s", exclude_core_type.c_str());
if (!graph_optimizer.empty()) {
for (auto &iter : graph_optimizer) {
if (iter.first == exclude_core_type || iter.second == nullptr) {
continue;
}
GELOGI("Begin to optimize whole graph by engine %s", iter.first.c_str());
ret = iter.second->OptimizeWholeGraph(*compute_graph);
GE_DUMP(compute_graph, "OptimizeWholeGraph" + iter.first);
if (ret != SUCCESS) {
GELOGE(ret, "[OptimizeWholeGraph]: graph optimize failed, ret:%u", ret);
return ret;
}
}
}
return ret;
}
} // namespace ge

+ 3
- 0
ge/graph/optimize/graph_optimize.h View File

@@ -52,6 +52,9 @@ class GraphOptimize {
// for fe prepare optimize in quantize scene
Status OptimizeOriginalGraphForQuantize(ComputeGraphPtr &compute_graph);

// for engine to optimize merged whole graph before ge Optimize2
Status OptimizeWholeGraph(ComputeGraphPtr &compute_graph);

// for rts optimize before build to add attr and insert memcpy op
Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);



+ 33
- 0
ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
}
}
}
return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
}

// Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
// node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
// concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
// edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
// each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
// successors of Data/Variable.
Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
atomic_clean_node->GetName().c_str());
auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
GE_CHECK_NOTNULL(out_ctrl_anchor);

for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
if (!need_handle) {
continue;
}
auto second_nodes = node->GetOutAllNodes();
for (const auto &second_node : second_nodes) {
GE_CHECK_NOTNULL(second_node);
auto in_ctrl_anchor = second_node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor);
if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
}
}
}

return SUCCESS;
}



+ 8
- 0
ge/graph/passes/atomic_addr_clean_pass.h View File

@@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass {
Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node);

/**
* Link atomic clean node to all potential precedence nodes which may execute before atomic clean node
* @param graph
* @param atomic_clean_node
* @return
*/
Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);

/**
* Check if this node is atomic op.
* @param node
* @return


+ 4
- 8
ge/graph/passes/dynamic_single_op_reset_shape_pass.cc View File

@@ -113,16 +113,13 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) {
GE_CHECK_NOTNULL(op_desc);
std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim};
GeShape dynamic_shape(dynamic_shape_dims);
bool reset_shape_flag = false;
if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) {
(void)ResetOutputTensorShape(op_desc, dynamic_shape);
}
(void)ResetInputTensorShape(op_desc, dynamic_shape);
(void)ResetOutputTensorShape(op_desc, dynamic_shape);
return SUCCESS;
}

Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape,
bool &reset_shape_flag) {
reset_shape_flag = false;
Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
const GeShape &dynamic_shape) {
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) {
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
@@ -136,7 +133,6 @@ Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc,
if (CheckIfConstInput(input_desc)) {
continue;
}
reset_shape_flag = true;
input_desc->SetShape(dynamic_shape);
}
return SUCCESS;


+ 1
- 1
ge/graph/passes/dynamic_single_op_reset_shape_pass.h View File

@@ -27,7 +27,7 @@ class DynamicSingleOpResetShapePass : public GraphPass {

private:
Status ResetOpShape(OpDescPtr &op_desc);
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag);
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape);
Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu);
bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc);


+ 53
- 45
ge/graph/passes/multi_batch_clone_pass.cc View File

@@ -22,6 +22,8 @@
#include "graph/preprocess/multi_batch_options.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "register/op_registry.h"

namespace ge {
@@ -478,8 +480,28 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {
if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
return SUCCESS;
}

(void)AttrUtils::SetListInt(data->GetOpDesc(), ATTR_MBATCH_ORIGIN_INPUT_DIMS, data_shape.GetDims());

GeTensorDesc tensor(NodeUtils::GetOutputDesc(*data, kDataOutIndex));
std::vector<std::string> input_dims_str;
for (size_t i = 0; i < batch_shapes_.size(); ++i) {
auto shape = data_shape;
auto ret = multibatch::CalcShape(data_to_dynamic_info_.at(data_name).at(i), shape);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to calculate the shape for data node %s, the shape may not match", data->GetName().c_str());
return ret;
}
tensor.SetShape(shape);
int64_t tensor_size = 0;
(void)TensorUtils::GetTensorSizeInBytes(tensor, tensor_size);
string input_str = TypeUtils::FormatToSerialString(tensor.GetFormat()) + ":" +
TypeUtils::DataTypeToSerialString(tensor.GetDataType()) + ":" + data->GetName() + ":" +
std::to_string(tensor_size) + ":" + std::to_string(tensor.GetShape().GetDimNum()) + ":" +
formats::JoinToString(tensor.GetShape().GetDims());
input_dims_str.emplace_back(input_str);
}
(void)AttrUtils::SetListStr(data->GetOpDesc(), "_all_origin_gears_inputs", input_dims_str);

size_t max_shape_index = 0;
int64_t max_size = 0;
for (size_t i = 0; i < batch_shapes_.size(); ++i) {
@@ -503,12 +525,24 @@ Status MultiBatchClonePass::SetMaxShapeToData(const NodePtr &data) {

///
/// @ingroup ge
/// @brief Set shape to Data node in branch.
/// @param [in] const NodePtr &data: data in branch.
/// @brief Update Data node in Subgraph.
/// @param [in] const NodePtr &data: data in Subgraph.
/// @param [in] size_t index: The batch index.
/// @return 0: SUCCESS / others: FAILED
///
Status MultiBatchClonePass::UpdateShapeToData(const NodePtr &data, size_t index) {
Status MultiBatchClonePass::UpdateSubgraphData(const NodePtr &data, size_t index) {
int node_index = -1;
if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_INDEX, node_index)) {
GELOGE(FAILED, "Failed to get index from data[%s]", data->GetName().c_str());
return FAILED;
}

int parent_index = node_index + 1;
if (!AttrUtils::SetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, "Failed to set parent index for node %s", data->GetName().c_str());
return FAILED;
}

auto data_shape = NodeUtils::GetOutputDesc(*data, kDataOutIndex).GetShape();
const auto &dims = data_shape.GetDims();
if (std::all_of(dims.begin(), dims.end(), [](int64_t val) { return val >= 0; })) {
@@ -580,13 +614,15 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
subgraph->SetParentGraph(graph);
graph->AddSubgraph(subgraph->GetName(), subgraph);
all_branch_output_[subgraph] = subgraph->FindFirstNodeMatchType(NETOUTPUT);
GE_CHK_STATUS_RET(UpdateSubgraphOutput(all_branch_output_[subgraph]),
"Update %s failed", all_branch_output_[subgraph]->GetName().c_str());

const string key_name = "branches" + std::to_string(i);
op_desc->AddSubgraphName(key_name);
op_desc->SetSubgraphInstanceName(i, subgraph->GetName());

for (const auto &data : input_nodes) {
GE_CHK_STATUS_RET(UpdateShapeToData(data, i), "Update %s failed", subgraph->GetName().c_str());
GE_CHK_STATUS_RET(UpdateSubgraphData(data, i), "Update %s failed", subgraph->GetName().c_str());
}
}

@@ -595,55 +631,27 @@ Status MultiBatchClonePass::CreateSubgraphs(const ComputeGraphPtr &graph, const
const auto &op_desc = n->GetOpDesc();
op_desc->SetName(n->GetName() + kMultiBatchNodePostfix + "0");
if (n->GetType() == DATA) {
GE_CHK_STATUS_RET(UpdateShapeToData(n, 0), "Update %s failed", branch->GetName().c_str());
GE_CHK_STATUS_RET(UpdateSubgraphData(n, 0), "Update %s failed", branch->GetName().c_str());
}
}

return PostProcSubgraph(graph);
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Assign parent index for branches.
/// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
/// @brief Update output_node in Subgraph.
/// @param [in] const NodePtr &output_node: output_node in Subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status MultiBatchClonePass::PostProcSubgraph(const ComputeGraphPtr &graph) {
auto func_desc = case_node_->GetOpDesc();
domi::ParseSubgraphFuncV2 parse_func_v2 = nullptr;
auto post_func = domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType());
if (post_func == nullptr) {
GELOGW("The subgraph post func for node %s type %s is null.", case_node_->GetName().c_str(),
case_node_->GetType().c_str());
if (domi::OpRegistry::Instance()->GetParseSubgraphPostFunc(func_desc->GetType(), parse_func_v2) != SUCCESS ||
parse_func_v2 == nullptr) {
GELOGW("The subgraph new post func v2 for node %s type %s is null", case_node_->GetName().c_str(),
case_node_->GetType().c_str());
return FAILED;
}
}

for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
const auto &subgraph = graph->GetSubgraph(name);
if (subgraph == nullptr) {
GELOGE(FAILED, "Subgraph not found, name: %s", name.c_str());
return FAILED;
}

std::string subgraph_name;
GE_CHK_STATUS_RET(func_desc->GetSubgraphNameByInstanceName(subgraph->GetName(), subgraph_name),
"Subgraph: %s get subgraph name failed.", subgraph->GetName().c_str());

auto graph = GraphUtils::CreateGraphFromComputeGraph(subgraph);
Status ret = FAILED;
if (post_func != nullptr) {
ret = post_func(subgraph_name, graph);
} else if (parse_func_v2 != nullptr) {
ret = parse_func_v2(subgraph_name.c_str(), graph);
}
if (ret != SUCCESS) {
GELOGE(FAILED, "Failed to post-process subgraph %s on node %s type %s", graph.GetName().c_str(),
case_node_->GetName().c_str(), case_node_->GetType().c_str());
Status MultiBatchClonePass::UpdateSubgraphOutput(const NodePtr &output_node) {
const auto &op_desc = output_node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (size_t index = 0; index < op_desc->GetInputsSize(); ++index) {
GeTensorDescPtr tensor = op_desc->MutableInputDesc(index);
GE_CHECK_NOTNULL(tensor);
if (!AttrUtils::SetInt(tensor, ATTR_NAME_PARENT_NODE_INDEX, index)) {
GELOGE(FAILED, "Failed to set parent index for node %s", output_node->GetName().c_str());
return FAILED;
}
}


+ 11
- 11
ge/graph/passes/multi_batch_clone_pass.h View File

@@ -105,12 +105,20 @@ class MultiBatchClonePass : public GraphPass {

///
/// @ingroup ge
/// @brief Set shape to Data node in branch.
/// @param [in] const NodePtr &data: data in branch.
/// @brief Update Data node in Subgraph.
/// @param [in] const NodePtr &data: data in Subgraph.
/// @param [in] size_t index: The batch index.
/// @return 0: SUCCESS / others: FAILED
///
Status UpdateShapeToData(const NodePtr &data, size_t index);
Status UpdateSubgraphData(const NodePtr &data, size_t index);

///
/// @ingroup ge
/// @brief Update output_node in Subgraph.
/// @param [in] const NodePtr &output_node: output_node in Subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status UpdateSubgraphOutput(const NodePtr &output_node);

///
/// @ingroup ge
@@ -133,14 +141,6 @@ class MultiBatchClonePass : public GraphPass {

///
/// @ingroup ge
/// @brief Assign parent index for branches.
/// @param [in] const ComputeGraphPtr &graph: Root/Case graph.
/// @return 0: SUCCESS / others: FAILED
///
Status PostProcSubgraph(const ComputeGraphPtr &graph);

///
/// @ingroup ge
/// @brief Remove subgraph supend output anchor.
/// @param [in] ComputeGraphPtr &graph: Parent compute graph.
/// @return 0: SUCCESS / others: FAILED


+ 231
- 315
ge/graph/passes/subgraph_const_migration_pass.cc View File

@@ -20,11 +20,12 @@
#include "graph/passes/folding_pass.h"

namespace ge {
constexpr uint32_t kDataOutIndex = 0;
constexpr uint32_t kZeroIndex = 0;
constexpr uint32_t kCaseInputBase = 1;
constexpr uint32_t kInvalidParent = 0x7fffffffU;
const string kMbatchNodeNameMark = "_ascend_mbatch_batch_";

bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
bool IsSameConstNode(const NodePtr &src_node, const NodePtr &dst_node) {
if ((src_node == nullptr) && (dst_node == nullptr)) {
return true;
}
@@ -37,35 +38,9 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
return false;
}

if ((src_node->GetInControlNodes().size() != dst_node->GetInControlNodes().size()) ||
(src_node->GetOutDataNodesSize() != dst_node->GetOutDataNodesSize())) {
return false;
}

set<uint32_t> related_parent;
const auto in_nodes = src_node->GetInControlNodes();
for (uint32_t i = 0; i < in_nodes.size(); ++i) {
const auto owner_node = in_nodes.at(i);
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
return false;
}

related_parent.insert(parent_index);
}

for (const auto &in_node : dst_node->GetInControlNodes()) {
uint32_t parent_index = 0;
if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
return false;
}

if (related_parent.count(parent_index) == 0) {
return false;
}
}

return true;
const GeTensorDesc &src_desc = src_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
const GeTensorDesc &dst_desc = dst_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
return (src_desc == dst_desc);
}

/***********************************************************************************************************************
@@ -89,12 +64,12 @@ bool IsSameOpNode(const NodePtr &src_node, const NodePtr &dst_node) {
+-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+
| Data | | Data | | Data | | Data | | Data | | Data | | Conv2D |
+-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ +-----------+
\ \ | / / | |
\ \ | / / | |
\ \ | / / | |
\ \ | / / | |
\ +-----------+ / | +-----------+
+---------------| Const |----------------+ | | Pooling |
\ \ | / / | | +-----------+
\ \ | / / | | | Const |
\ \ | / / | | +-----------+
\ \ | / / | | /
\ +-----------+ / | +-----------+ /
+---------------| Const |----------------+ | | Pooling |-----+
+-----------+ | +-----------+
\ | /
\ | /
@@ -126,28 +101,26 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {
continue;
}

do {
migration_append_ = false;
map<ComputeGraphPtr, map<uint32_t, NodePtr>> graph_datas;
if (ClassifyDataNodes(graph, func_desc, graph_datas) != SUCCESS) {
return FAILED;
}
map<ComputeGraphPtr, map<string, NodePtr>> all_const_nodes;
map<ComputeGraphPtr, map<uint32_t, NodePtr>> all_data_nodes;
if (ClassifyGraphNodes(graph, func_desc, all_const_nodes, all_data_nodes) != SUCCESS) {
return FAILED;
}

if (graph_datas.empty()) {
GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
break;
}
if (all_const_nodes.empty()) {
GELOGW("Graph: %s subgraph is empty", graph->GetName().c_str());
break;
}

// {subgraph0, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
// {subgraph1, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
// {subgraph2, {{1, Data}, {2, Data}, {3, Data}, {4, Data}, ..., {n, Data}}}
const auto base_nodes = graph_datas.begin()->second; // Need copy.
for (const auto &node_item : base_nodes) {
if (GraphNodeMigration(graph, node, graph_datas, node_item.second, node_item.first) != SUCCESS) {
return FAILED;
}
// {subgraph0, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
// {subgraph1, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
// {subgraph2, {{key1, Const}, {key2, Const}, {key3, Const}, {key4, Const}, ..., {keyn, Const}}}
const auto &const_nodes = all_const_nodes.begin()->second;
for (const auto &item : const_nodes) {
if (GraphNodeMigration(graph, node, all_const_nodes, all_data_nodes, item.second, item.first) != SUCCESS) {
return FAILED;
}
} while (migration_append_);
}
}

return SUCCESS;
@@ -155,14 +128,16 @@ Status SubgraphConstMigrationPass::Run(ComputeGraphPtr graph) {

///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @brief Get all Const/Data nodes for all subgraph.
/// @param [in] graph: Root compute graph.
/// @param [in] func_desc: functional OpDesc of Case.
/// @param [out] graph_datas: Data groups of subgraph.
/// @param [out] all_const_nodes: Const groups of subgraph.
/// @param [out] all_data_nodes: Data groups of subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas) {
Status SubgraphConstMigrationPass::ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
for (const auto &name : func_desc->GetSubgraphInstanceNames()) {
const auto &subgraph = graph->GetSubgraph(name);
if (subgraph == nullptr) {
@@ -170,32 +145,47 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap
return GE_GRAPH_EMPTY_SUBGRAPH;
}

auto &data_nodes = graph_datas[subgraph];
for (auto &data : subgraph->GetDirectNode()) {
if (data->GetType() != DATA) {
continue;
}
auto &data_nodes = all_data_nodes[subgraph];
auto &const_nodes = all_const_nodes[subgraph];
for (auto &node : subgraph->GetDirectNode()) {
if (node->GetType() == DATA) {
uint32_t parent_index = kInvalidParent;
if (!AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
return FAILED;
}

uint32_t parent_index = 0;
if (!AttrUtils::GetInt(data->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, "Parent index not found, name: %s", data->GetName().c_str());
return FAILED;
}
data_nodes[parent_index] = node;
GELOGD("%s, index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, node->GetName().c_str());
} else if ((node->GetType() == CONSTANT) && (node->GetOutDataAnchor(kZeroIndex) != nullptr)) {
set<string> peer_name_list;
const auto &out_anchor = node->GetOutDataAnchor(kZeroIndex);
for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
const auto &peer_node = in_anchor->GetOwnerNode();
// Trim subgraph node name prefix.
string node_full_name = peer_node->GetName();
size_t pos = node_full_name.find(kMbatchNodeNameMark);
if (pos == string::npos) {
GELOGE(FAILED, "find: %s of multi-batch in node: %s", kMbatchNodeNameMark.c_str(), node_full_name.c_str());
return FAILED;
}

string fixed_name = node_full_name.substr(0, pos);
pos = node_full_name.find("_", pos + kMbatchNodeNameMark.length());
if (pos != string::npos) {
fixed_name += node_full_name.substr(pos);
}

peer_name_list.insert(fixed_name + ":" + std::to_string(in_anchor->GetIdx()));
}

data_nodes[parent_index] = data;
GELOGD("%s, Parent index: %u, Data: %s", subgraph->GetName().c_str(), parent_index, data->GetName().c_str());
}
}
string key_of_const;
for (const string &name : peer_name_list) {
key_of_const += (key_of_const.empty() ? name : "_" + name);
}

auto iter = graph_datas.begin();
if (iter == graph_datas.end()) {
return SUCCESS;
}
for (const auto &data_nodes : graph_datas) {
if (data_nodes.second.size() != iter->second.size()) {
GELOGE(FAILED, "Subgraph %s has invalid Data nodes[%zu != %zu]",
data_nodes.first->GetName().c_str(), data_nodes.second.size(), iter->second.size());
return FAILED;
const_nodes[key_of_const] = node;
GELOGD("%s, Key: %s, Const: %s", subgraph->GetName().c_str(), key_of_const.c_str(), node->GetName().c_str());
}
}
}

@@ -204,36 +194,27 @@ Status SubgraphConstMigrationPass::ClassifyDataNodes(const ComputeGraphPtr &grap

///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @param [in] node: Const node of subgraph.
/// @param [out] inputs: parent index to Const.
/// @param [out] outputs: Data groups of subgraph.
/// @brief Get parent_index for Const node migration.
/// @param [in] all_data_nodes: Data groups of subgraph.
/// @param [in] const_node: Const node will process.
/// @param [out] parent_index: parent index for replace Data.
/// @return true: SUCCESS / false: FAILED
///
bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs,
map<uint32_t, uint32_t> &outputs) {
for (uint32_t i = 0; i < node->GetAllOutDataAnchorsSize(); ++i) {
outputs[i] = kInvalidParent;
}

uint32_t out_index = 0;
const auto in_nodes = node->GetInAllNodes();
for (size_t i = 0; i < in_nodes.size(); ++i) {
const auto owner_node = in_nodes.at(i);
if (owner_node->GetType() != DATA) {
bool SubgraphConstMigrationPass::GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const NodePtr &const_node, uint32_t &parent_index) {
for (const auto in_node : const_node->GetInAllNodes()) {
if (in_node->GetType() != DATA) {
return false;
}

uint32_t parent_index = 0;
if (!AttrUtils::GetInt(owner_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
uint32_t node_index = 0;
if (!AttrUtils::GetInt(in_node->GetOpDesc(), ATTR_NAME_PARENT_NODE_INDEX, node_index)) {
return false;
}

// Input Data feed other Node, need add new Data.
inputs[i] = parent_index;
if ((out_index == outputs.size()) && owner_node->GetOutDataNodes().empty()) {
outputs[out_index] = parent_index;
++out_index;
if ((parent_index == kInvalidParent) && in_node->GetOutDataNodes().empty()) {
parent_index = node_index;
}
}

@@ -242,43 +223,26 @@ bool SubgraphConstMigrationPass::GetAssociatedNodes(const NodePtr &node, map<uin

///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] data_base: Data Node for migration.
/// @param [in] data_idx: Data groups of subgraph.
/// @param [in] data_idx: Data groups of subgraph.
/// @brief Check parallel node is same for all subgraph.
/// @param [in] all_const_nodes: Const groups of subgraph.
/// @param [in] const_node: Const Node for migration.
/// @param [in] node_key: Key of Const node.
/// @return true: Same / false: not same
///
bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
const NodePtr &const_node, uint32_t parent_index, size_t index) {
auto it = graph_datas.begin();
for (++it; it != graph_datas.end(); ++it) {
const auto &data_nodes = it->second;
auto data_it = data_nodes.find(parent_index);
if (data_it == data_nodes.end()) {
GELOGE(FAILED, "Data: %s not fount, index: %u", const_node->GetName().c_str(), parent_index);
return false;
}

const auto &work_data = data_it->second;
const auto &out_anchor = work_data->GetOutControlAnchor();
const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
GELOGW("Node anchors not same, Data: %s -> %s anchor size: %zu, index: %zu",
work_data->GetName().c_str(), const_node->GetName().c_str(), in_anchors.size(), index);
return false;
}

const auto &in_anchor = in_anchors.at(index);
const auto &work_node = in_anchor->GetOwnerNode();
if (work_node == nullptr) {
GELOGE(FAILED, "Data: %s not found, parent: %u, index: %zu", const_node->GetName().c_str(), parent_index, index);
bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
const NodePtr &const_node, const string &node_key) {
auto it = all_const_nodes.begin();
for (++it; it != all_const_nodes.end(); ++it) {
const auto &const_nodes = it->second;
auto node_it = const_nodes.find(node_key);
if (node_it == const_nodes.end()) {
GELOGW("Const node: %s not fount, key: %s", const_node->GetName().c_str(), node_key.c_str());
return false;
}

if (!IsSameOpNode(const_node, work_node)) {
GELOGI("OpDesc not same: %s %s, parent: %u, index: %zu",
const_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, index);
const auto &work_node = node_it->second;
if (!IsSameConstNode(const_node, work_node)) {
GELOGI("Not same: %s %s, key: %s", const_node->GetName().c_str(), work_node->GetName().c_str(), node_key.c_str());
return false;
}
}
@@ -291,51 +255,34 @@ bool SubgraphConstMigrationPass::IsParallelNodeSame(const map<ComputeGraphPtr, m
/// @brief Migration subgraph Node to Root
/// @param [in] graph: Root compute graph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] data_base: Data Node for migration.
/// @param [in] data_idx: Data groups of subgraph.
/// @param [in] all_const_nodes: Const groups of subgraph.
/// @param [in] all_data_nodes: Data groups of subgraph.
/// @param [in] const_node: Const Node for migration.
/// @param [in] node_key: Key of Const node for migration.
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
const NodePtr &data_node, uint32_t parent_index) {
bool can_extrapolation = false;
do {
can_extrapolation = false;
const auto &out_anchor = data_node->GetOutControlAnchor();
const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
for (size_t i = in_anchors.size(); i > 0; --i) {
const auto &in_anchor = in_anchors.at(i - 1);
const auto &work_node = in_anchor->GetOwnerNode();
GELOGD("Data: %s, node: %s, parent: %u, index: %zu",
data_node->GetName().c_str(), work_node->GetName().c_str(), parent_index, i);
if (work_node->GetType() != CONSTANT) {
continue;
}

// Get associated Data, if Data feed other nodes, need append new Data.
map<uint32_t, uint32_t> inputs;
map<uint32_t, uint32_t> outputs;
if (!GetAssociatedNodes(work_node, inputs, outputs)) {
continue;
}
const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const NodePtr &const_node, const string &node_key) {
if (!IsParallelNodeSame(all_const_nodes, const_node, node_key)) {
return SUCCESS;
}

if (!IsParallelNodeSame(graph_datas, work_node, parent_index, i - 1)) {
continue;
}
// Get associated Data, if Data feed other nodes, need append new Data.
uint32_t parent_index = kInvalidParent;
if (!GetAssociatedNodes(all_data_nodes, const_node, parent_index)) {
return SUCCESS;
}

GELOGI("Move node: %s, parent: %u, index: %zu", work_node->GetName().c_str(), parent_index, i);
if (AppendParallelNode(graph_datas, func_node, outputs) != SUCCESS) {
return FAILED;
}
GELOGI("Move node: %s, parent index: %u", const_node->GetName().c_str(), parent_index);
if (AppendParallelNode(func_node, parent_index, all_data_nodes) != SUCCESS) {
return FAILED;
}

if (MoveNodeToParent(graph, func_node, graph_datas, parent_index, i - 1, inputs, outputs) != SUCCESS) {
return FAILED;
}
can_extrapolation = true;
break;
}
} while (can_extrapolation);
if (MoveNodeToParent(graph, func_node, all_const_nodes, all_data_nodes, node_key, parent_index) != SUCCESS) {
return FAILED;
}

return SUCCESS;
}
@@ -343,114 +290,100 @@ Status SubgraphConstMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra
///
/// @ingroup ge
/// @brief Append Input Tensor for functional node.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] outputs: Parent index of Node output.
/// @param [in/out] parent_index: Parent index for migration.
/// @param [in/out] all_data_nodes: Data groups of subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
const NodePtr &func_node, map<uint32_t, uint32_t> &outputs) {
Status SubgraphConstMigrationPass::AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes) {
// If outputs index invalid, add Data and Input Tensor.
for (auto &item : outputs) {
if (item.second != kInvalidParent) {
continue;
}

// Add Data to subgraph.
map<ComputeGraphPtr, uint32_t> append_num;
for (auto &groups : graph_datas) {
const auto &subgraph = groups.first;
auto &data_nodes = groups.second;

item.second = func_node->GetAllInDataAnchorsSize() + append_num[subgraph]; // Update to valid parent index.
const auto data_name = subgraph->GetName() + "_data_" + std::to_string(item.second);

OpDescBuilder op_builder(data_name, DATA);
const OpDescPtr op_desc = op_builder.AddInput("x").AddOutput("y").Build();
if (op_desc == nullptr) {
GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
return OUT_OF_MEMORY;
}
if (parent_index != kInvalidParent) {
return SUCCESS;
}

uint32_t data_index = item.second - kCaseInputBase;
if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
return FAILED;
}
// Add Data to subgraph.
parent_index = func_node->GetAllInDataAnchorsSize(); // Update to valid parent index.
for (auto &item : all_data_nodes) {
const auto &subgraph = item.first;
const auto data_name = subgraph->GetName() + "_data_" + std::to_string(parent_index);
OpDescBuilder op_builder(data_name, DATA);
const auto op_desc = op_builder.AddInput("x").AddOutput("y").Build();
if (op_desc == nullptr) {
GELOGE(OUT_OF_MEMORY, "Create multi-batch subgraph data desc failed");
return OUT_OF_MEMORY;
}

if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, item.second)) {
GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
return FAILED;
}
uint32_t data_index = parent_index - kCaseInputBase;
if (!AttrUtils::SetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
return FAILED;
}

append_num[subgraph]++;
data_nodes[item.second] = subgraph->AddNode(op_desc);
GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), item.second);
if (!AttrUtils::SetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) {
GELOGE(FAILED, "Parent index not found, name: %s", op_desc->GetName().c_str());
return FAILED;
}

// Add InputTensor to functional Node.
NodeUtils::AppendInputAnchor(func_node, item.second + 1);
item.second[parent_index] = subgraph->AddNode(op_desc);
GELOGI("Add Node: %s, parent index: %u", op_desc->GetName().c_str(), parent_index);
}

// Add InputTensor to functional Node.
NodeUtils::AppendInputAnchor(func_node, parent_index + 1);
return SUCCESS;
}

///
/// @ingroup ge
/// @brief Delete Node from all subgraph.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] detach: Node will move to parent.
/// @param [in] outputs: Parent index of Node output.
/// @brief Delete Node from subgraph.
/// @param [in] graph: subgraph for process.
/// @param [in] const_node: Node will move to parent.
/// @param [in] data_node: Place holder for Const.
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
const map<uint32_t, uint32_t> &outputs) {
Status SubgraphConstMigrationPass::DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node,
const NodePtr &data_node) {
// Break Data and Move node.
const auto &in_anchor = detach->GetInControlAnchor();
const auto &out_anchors = in_anchor->GetPeerOutControlAnchors();
for (size_t i = out_anchors.size(); i > 0; --i) {
const auto &out_anchor = out_anchors.at(i - 1);
const auto &in_anchor = const_node->GetInControlAnchor();
const auto out_anchors = in_anchor->GetPeerOutControlAnchors();
for (const auto out_anchor : out_anchors) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
const auto &owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), detach->GetName().c_str());
}

// Break Move and follow, Link Data and follow.
for (uint32_t i = 0; i < detach->GetAllOutDataAnchorsSize(); ++i) {
auto it_idx = outputs.find(i);
if (it_idx == outputs.end()) {
GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
return FAILED;
}

auto it_data = graph_datas.find(it_idx->second);
if (it_data == graph_datas.end()) {
GELOGE(FAILED, "Node: %s parent index %u not found", detach->GetName().c_str(), i);
return FAILED;
const auto owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), const_node->GetName().c_str());
if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty() && owner_node != data_node) {
graph->RemoveNode(owner_node);
}
}

const auto &data_node = it_data->second;
const auto &out_anchor = detach->GetOutDataAnchor(i);
const auto &ctrl_anchor = const_node->GetOutControlAnchor();
const auto ctrl_anchors = ctrl_anchor->GetPeerInControlAnchors();
for (const auto in_anchor : ctrl_anchors) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(ctrl_anchor, in_anchor), "Remove edge failed");
GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

const auto &out_desc = detach->GetOpDesc()->GetOutputDesc(i);
const auto &data_desc = data_node->GetOpDesc();
(void)data_desc->UpdateInputDesc(kDataOutIndex, out_desc); // Set Data Input to new connect Node.
(void)data_desc->UpdateOutputDesc(kDataOutIndex, out_desc); // Set Data Output to new connect Node.
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutControlAnchor(), in_anchor), "Add edge failed");
GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
}

for (const auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
if (in_anchor == nullptr) {
continue;
}
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
const auto &owner_node = in_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", detach->GetName().c_str(), owner_node->GetName().c_str());
// Break Move and follow, Link Data and follow.
const auto &out_anchor = const_node->GetOutDataAnchor(kZeroIndex);
const auto in_anchors =out_anchor->GetPeerInDataAnchors();
for (const auto in_anchor : in_anchors) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
GELOGI("Remove Edge: %s %s", const_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());

const auto &data_out_anchor = data_node->GetOutDataAnchor(kDataOutIndex);
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_out_anchor, in_anchor), "Add edge failed");
GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), owner_node->GetName().c_str());
}
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(data_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
GELOGI("Add Edge: %s %s", data_node->GetName().c_str(), in_anchor->GetOwnerNode()->GetName().c_str());
}

// Update Data op DataType.
const auto &const_desc = const_node->GetOpDesc();
const auto &tensor_desc = const_desc->GetOutputDesc(kZeroIndex);
const auto &data_desc = data_node->GetOpDesc();
(void)data_desc->UpdateInputDesc(kZeroIndex, tensor_desc); // Set Data Input to new connect Node.
(void)data_desc->UpdateOutputDesc(kZeroIndex, tensor_desc); // Set Data Output to new connect Node.

return SUCCESS;
}

@@ -459,47 +392,37 @@ Status SubgraphConstMigrationPass::DetachParallelNode(const map<uint32_t, NodePt
/// @brief Move Node to Parent Graph.
/// @param [in] graph: Parent compute graph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] attach: Node will move to parent.
/// @param [in] inputs: Parent index of Node input.
/// @param [in] outputs: Parent index of Node output.
/// @param [in] const_node: Node will move to parent.
/// @param [in] parent_index: Parent index of Node input.
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
const NodePtr &attach, const map<uint32_t, uint32_t> &inputs,
const map<uint32_t, uint32_t> &outputs) {
GE_CHECK_NOTNULL(attach);
for (const auto item : inputs) {
if (item.second == kInvalidParent) { // Not connect, Skip.
continue;
}

const auto &in_anchor = func_node->GetInDataAnchor(item.second);
const auto &out_anchor = in_anchor->GetPeerOutAnchor();
const auto &owner_node = out_anchor->GetOwnerNode();
const auto &in_control = attach->GetInControlAnchor();
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(owner_node->GetOutControlAnchor(), in_control), "Add edge failed");
GELOGI("Add Edge: %s %s", owner_node->GetName().c_str(), attach->GetName().c_str());
const NodePtr &const_node, uint32_t parent_index) {
GE_CHECK_NOTNULL(const_node);
if (parent_index == kInvalidParent) {
return INTERNAL_ERROR;
}

for (const auto &item : outputs) {
const auto &func_desc = func_node->GetOpDesc();
const auto &out_desc = attach->GetOpDesc()->GetOutputDesc(item.second);
(void)func_desc->UpdateInputDesc(item.second, out_desc); // Set Data Input to new connect Node.

const auto &in_anchor = func_node->GetInDataAnchor(item.second);
const auto &out_anchor = in_anchor->GetPeerOutAnchor();
if (out_anchor != nullptr) {
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
const auto &owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
const auto &func_desc = func_node->GetOpDesc();
const auto &tensor_desc = const_node->GetOpDesc()->GetOutputDesc(kZeroIndex);
(void)func_desc->UpdateInputDesc(parent_index, tensor_desc); // Set Data Input to new connect Node.

const auto &in_anchor = func_node->GetInDataAnchor(parent_index);
const auto &out_anchor = in_anchor->GetPeerOutAnchor();
if (out_anchor != nullptr) { // Break useless old link.
GE_CHK_GRAPH_STATUS_RET(GraphUtils::RemoveEdge(out_anchor, in_anchor), "Remove edge failed");
const auto owner_node = out_anchor->GetOwnerNode();
GELOGI("Remove Edge: %s %s", owner_node->GetName().c_str(), func_node->GetName().c_str());
if (owner_node->GetInAllNodes().empty() && owner_node->GetOutAllNodes().empty()) {
graph->RemoveNode(owner_node);
}
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(attach->GetOutDataAnchor(item.first), in_anchor), "Add edge failed");
GELOGI("Add Edge: %s %s", attach->GetName().c_str(), func_node->GetName().c_str());
}
GE_CHK_GRAPH_STATUS_RET(GraphUtils::AddEdge(const_node->GetOutDataAnchor(kZeroIndex), in_anchor), "Add edge failed");
GELOGI("Add Edge: %s %s, index: %u", const_node->GetName().c_str(), func_node->GetName().c_str(), parent_index);

(void)graph->AddNode(attach);
(void)attach->SetOwnerComputeGraph(graph);
GELOGI("Add Node: %s %s", graph->GetName().c_str(), attach->GetName().c_str());
(void)graph->AddNode(const_node);
(void)const_node->SetOwnerComputeGraph(graph);
GELOGI("Add Node: %s %s", graph->GetName().c_str(), const_node->GetName().c_str());
return SUCCESS;
}

@@ -515,43 +438,37 @@ Status SubgraphConstMigrationPass::AttachParallelNode(const ComputeGraphPtr &gra
/// @return 0: SUCCESS / others: FAILED
///
Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas,
uint32_t parent_index, uint32_t index,
const map<uint32_t, uint32_t> &inputs,
const map<uint32_t, uint32_t> &outputs) {
if (inputs.empty()) {
const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const string &node_key, uint32_t parent_index) {
if (node_key.empty() || parent_index == kInvalidParent) {
GELOGE(FAILED, "Graph: %s, inputs is empty", graph->GetName().c_str());
return FAILED;
}

NodePtr move_node;
for (auto &groups : graph_datas) {
const auto &subgraph = groups.first;
const auto &data_nodes = groups.second;
auto it = data_nodes.find(parent_index);
if (it == data_nodes.end()) {
GELOGE(FAILED, "Graph: %s, Data: %u node not found", subgraph->GetName().c_str(), parent_index);
for (auto &item : all_const_nodes) {
const auto &subgraph = item.first;
const auto it_const = item.second.find(node_key);
if (it_const == item.second.end()) {
GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
return FAILED;
}
move_node = it_const->second;

const auto &base_data = it->second;
const auto &out_anchor = base_data->GetOutControlAnchor();
const auto &in_anchors = out_anchor->GetPeerInControlAnchors();
if (in_anchors.size() <= index || in_anchors.at(index) == nullptr) {
GELOGE(FAILED, "Data: %s, anchor size: %zu, index: %u not found",
base_data->GetName().c_str(), in_anchors.size(), index);
const auto it_nodes = all_data_nodes.find(subgraph);
if (it_nodes == all_data_nodes.end()) {
GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
return FAILED;
}

const auto &in_anchor = in_anchors.at(index);
move_node = in_anchor->GetOwnerNode();
if (move_node == nullptr) {
GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
const auto it_data = it_nodes->second.find(parent_index);
if (it_data == it_nodes->second.end()) {
GELOGE(FAILED, "Graph: %s, Const: %s node not found", subgraph->GetName().c_str(), node_key.c_str());
return FAILED;
}

if (DetachParallelNode(data_nodes, move_node, outputs) != SUCCESS) {
GELOGE(FAILED, "Data: %s not found, index: %u", base_data->GetName().c_str(), parent_index);
if (DetachParallelNode(subgraph, move_node, it_data->second) != SUCCESS) {
GELOGE(FAILED, "Data: %s not found, index: %u", move_node->GetName().c_str(), parent_index);
return FAILED;
}

@@ -559,11 +476,10 @@ Status SubgraphConstMigrationPass::MoveNodeToParent(const ComputeGraphPtr &graph
GELOGI("Remove Node: %s %s", subgraph->GetName().c_str(), move_node->GetName().c_str());
}

if (AttachParallelNode(graph, func_node, move_node, inputs, outputs) != SUCCESS) {
if (AttachParallelNode(graph, func_node, move_node, parent_index) != SUCCESS) {
return FAILED;
}

migration_append_ = true;
return SUCCESS;
}
} // namespace ge

+ 42
- 42
ge/graph/passes/subgraph_const_migration_pass.h View File

@@ -36,50 +36,54 @@ class SubgraphConstMigrationPass : public GraphPass {
private:
///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @brief Get all Const/Data nodes for all subgraph.
/// @param [in] graph: Root compute graph.
/// @param [in] func_desc: functional OpDesc of Case.
/// @param [out] graph_datas: Data groups of subgraph.
/// @param [out] all_const_nodes: Const groups of subgraph.
/// @param [out] all_data_nodes: Data groups of subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status ClassifyDataNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_datas);
Status ClassifyGraphNodes(const ComputeGraphPtr &graph, const OpDescPtr &func_desc,
map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @param [in] node: Const node of subgraph.
/// @param [in] func_desc: functional OpDesc of Case.
/// @param [out] graph_nodes: Data groups of subgraph.
/// @brief Get parent_index for Const node migration.
/// @param [in] all_data_nodes: Data groups of subgraph.
/// @param [in] const_node: Const node will process.
/// @param [out] parent_index: parent index for replace Data.
/// @return true: SUCCESS / false: FAILED
///
bool GetAssociatedNodes(const NodePtr &node, map<uint32_t, uint32_t> &inputs, map<uint32_t, uint32_t> &outputs);
bool GetAssociatedNodes(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const NodePtr &const_node, uint32_t &parent_index);

///
/// @ingroup ge
/// @brief Get all Data nodes for all subgraph.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] data_base: Data Node for migration.
/// @param [in] data_idx: Data groups of subgraph.
/// @param [in] data_idx: Data groups of subgraph.
/// @brief Check parallel node is same for all subgraph.
/// @param [in] all_const_nodes: Const groups of subgraph.
/// @param [in] const_node: Const Node for migration.
/// @param [in] node_key: Key of Const node.
/// @return true: Same / false: not same
///
bool IsParallelNodeSame(const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
const NodePtr &const_node, uint32_t parent_index, size_t index);
bool IsParallelNodeSame(const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
const NodePtr &const_node, const string &node_key);

///
/// @ingroup ge
/// @brief Migration subgraph Node to Root
/// @param [in] graph: Root compute graph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] data_base: Data Node for migration.
/// @param [in] data_idx: Data groups of subgraph.
/// @param [in] all_const_nodes: Const groups of subgraph.
/// @param [in] all_data_nodes: Data groups of subgraph.
/// @param [in] const_node: Const Node for migration.
/// @param [in] node_key: Key of Const node for migration.
/// @return 0: SUCCESS / others: FAILED
///
Status GraphNodeMigration(const ComputeGraphPtr &graph, const NodePtr &func_node,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
const NodePtr &data_base, uint32_t data_idx);
const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const NodePtr &const_node, const string &node_key);

///
/// @ingroup ge
@@ -93,46 +97,42 @@ class SubgraphConstMigrationPass : public GraphPass {
/// @return 0: SUCCESS / others: FAILED
///
Status MoveNodeToParent(const ComputeGraphPtr &graph, const NodePtr &func_node,
const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
uint32_t parent_index, uint32_t anchor_idx,
const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);
const map<ComputeGraphPtr, map<string, NodePtr>> &all_const_nodes,
const map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes,
const string &node_key, uint32_t parent_index);

///
/// @ingroup ge
/// @brief Append Input Tensor for functional node.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] outputs: Parent index of Node output.
/// @param [in] graph_nodes: Const groups of subgraph.
/// @param [in/out] parent_index: Parent index for migration.
/// @param [in/out] all_data_nodes: Data groups of subgraph.
/// @return 0: SUCCESS / others: FAILED
///
Status AppendParallelNode(map<ComputeGraphPtr, map<uint32_t, NodePtr>> &graph_nodes,
const NodePtr &func_node, map<uint32_t, uint32_t> &outputs);
Status AppendParallelNode(const NodePtr &func_node, uint32_t &parent_index,
map<ComputeGraphPtr, map<uint32_t, NodePtr>> &all_data_nodes);

///
/// @ingroup ge
/// @brief Delete Node from all subgraph.
/// @param [in] graph_nodes: Data groups of subgraph.
/// @param [in] detach: Node will move to parent.
/// @param [in] outputs: Parent index of Node output.
/// @brief Delete Node from subgraph.
/// @param [in] graph: subgraph for process.
/// @param [in] const_node: Node will move to parent.
/// @param [in] data_node: Place holder for Const.
/// @return 0: SUCCESS / others: FAILED
///
Status DetachParallelNode(const map<uint32_t, NodePtr> &graph_datas, const NodePtr &detach,
const map<uint32_t, uint32_t> &outputs);
Status DetachParallelNode(const ComputeGraphPtr &graph, const NodePtr &const_node, const NodePtr &data_node);

///
/// @ingroup ge
/// @brief Move Node to Parent Graph.
/// @param [in] graph: Parent compute graph.
/// @param [in] func_node: functional Node of Case.
/// @param [in] attach: Node will move to parent.
/// @param [in] inputs: Parent index of Node input.
/// @param [in] outputs: Parent index of Node output.
/// @param [in] const_node: Node will move to parent.
/// @param [in] parent_index: Parent index of Node input.
/// @return 0: SUCCESS / others: FAILED
///
Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node, const NodePtr &attach,
const map<uint32_t, uint32_t> &inputs, const map<uint32_t, uint32_t> &outputs);

bool migration_append_{false};
Status AttachParallelNode(const ComputeGraphPtr &graph, const NodePtr &func_node,
const NodePtr &const_node, uint32_t parent_index);
};
} // namespace ge
#endif // GE_COMMON_SUBGRAPH_CONST_MIGRATION_H_

+ 8
- 4
ge/graph/passes/switch_to_stream_switch_pass.cc View File

@@ -17,8 +17,13 @@
#include "graph/passes/switch_to_stream_switch_pass.h"
#include <stack>
#include "common/ge/ge_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/debug/log.h"
#include "framework/common/ge_inner_error_codes.h"
#include "framework/common/types.h"
#include "ge/ge_api_types.h"
#include "graph/common/omg_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/utils/type_utils.h"

@@ -120,13 +125,12 @@ void SwitchToStreamSwitchPass::MarkCycleDependence(
if (visited.count(tmp_node) > 0) {
continue;
}
GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str());
for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) {
if (switch_nodes.find(out_node) == switch_nodes.end()) {
out_nodes.push(out_node);
continue;
}
GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.",
tmp_node->GetName().c_str(), out_node->GetName().c_str());
GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS,
GELOGW("set cyclic dependence attr failed."); return );
auto map_iter = switch_cyclic_map_.find(out_node);
@@ -598,7 +602,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons
///
Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node,
const std::set<NodePtr> &same_cond_switch) {
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(),
cast_node->GetName().c_str());
std::string orig_switch_name = switch_node->GetName();
OpDescPtr switch_desc = switch_node->GetOpDesc();
@@ -649,7 +653,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no
///
Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch,
const NodePtr &active_node) {
GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(),
stream_switch->GetName().c_str(), active_node->GetName().c_str());
auto find_res = switch_node_map_.find(switch_node);
GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), {


+ 44
- 0
ge/graph/preprocess/graph_preprocess.cc View File

@@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <string>
#include <utility>
#include "common/formats/format_transfers/format_transfer_fractal_nz.h"
#include "common/formats/format_transfers/format_transfer_fractal_z.h"
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
@@ -27,9 +28,13 @@
#include "common/helper/model_helper.h"
#include "common/math/math_util.h"
#include "common/op/ge_op_utils.h"
#include "common/util/error_manager/error_manager.h"
#include "common/formats/utils/formats_trans_utils.h"
#include "framework/common/debug/ge_log.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/common/local_context.h"
#include "graph/common/transop_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/ge_context.h"
#include "graph/shape_refiner.h"
#include "graph/manager/graph_var_manager.h"
@@ -39,21 +44,29 @@
#include "graph/passes/aicpu_constant_folding_pass.h"
#include "graph/passes/assert_pass.h"
#include "graph/passes/assign_pass.h"
#include "graph/passes/base_pass.h"
#include "graph/passes/common_subexpression_elimination_pass.h"
#include "graph/passes/cond_pass.h"
#include "graph/passes/cond_remove_pass.h"
#include "graph/passes/constant_folding_pass.h"
#include "graph/passes/constant_fuse_same_pass.h"
#include "graph/passes/control_trigger_pass.h"
#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/dimension_compute_pass.h"
#include "graph/passes/dropout_pass.h"
#include "graph/passes/enter_pass.h"
#include "graph/passes/flow_ctrl_pass.h"
#include "graph/passes/for_pass.h"
#include "graph/passes/get_original_format_pass.h"
#include "graph/passes/guarantee_const_pass.h"
#include "graph/passes/hccl_group_pass.h"
#include "graph/passes/hccl_memcpy_pass.h"
#include "graph/passes/identity_pass.h"
#include "graph/passes/infershape_pass.h"
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/merge_pass.h"
#include "graph/passes/net_output_pass.h"
#include "graph/passes/next_iteration_pass.h"
#include "graph/passes/no_use_reshape_remove_pass.h"
#include "graph/passes/parallel_concat_start_op_pass.h"
#include "graph/passes/placeholder_with_default_pass.h"
@@ -68,18 +81,45 @@
#include "graph/passes/shape_operate_op_remove_pass.h"
#include "graph/passes/snapshot_pass.h"
#include "graph/passes/stop_gradient_pass.h"
#include "graph/passes/subgraph_pass.h"
#include "graph/passes/switch_data_edges_bypass.h"
#include "graph/passes/switch_dead_branch_elimination.h"
#include "graph/passes/switch_logic_remove_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
#include "graph/passes/switch_to_stream_switch_pass.h"
#include "graph/passes/attach_stream_label_pass.h"
#include "graph/passes/unused_const_pass.h"
#include "graph/passes/unused_op_remove_pass.h"
#include "graph/passes/var_is_initialized_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/preprocess/insert_op/util_insert_aipp_op.h"
#include "graph/types.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "inc/pass_manager.h"
#include "init/gelib.h"
#include "multi_batch_copy_graph.h"
#include "runtime/dev.h"

#include "graph/passes/dimension_adjust_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/permute_pass.h"
#include "graph/passes/reshape_remove_pass.h"
#include "graph/passes/same_transdata_breadth_fusion_pass.h"
#include "graph/passes/transop_breadth_fusion_pass.h"
#include "graph/passes/transop_depth_fusion_pass.h"
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h"

#include "graph/passes/cast_remove_pass.h"
#include "graph/passes/data_pass.h"
#include "graph/passes/transop_without_reshape_fusion_pass.h"
#include "graph/passes/transpose_transdata_pass.h"
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/mark_agnostic_pass.h"


namespace ge {
namespace {
static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
@@ -1606,6 +1646,10 @@ Status GraphPrepare::InferShapeForPreprocess() {
if (!options_.train_graph_flag) {
names_to_passes.emplace_back("AssertPass", &assert_pass);
}
SwitchDeadBranchElimination switch_dead_branch_elimination;
names_to_passes.emplace_back("SwitchDeadBranchElimination", &switch_dead_branch_elimination);
MergePass merge_pass;
names_to_passes.emplace_back("MergePass", &merge_pass);
InferShapePass infer_shape_pass;
names_to_passes.emplace_back("InferShapePass", &infer_shape_pass);
ReplaceWithEmptyConstPass replace_with_empty_const_pass;


+ 6
- 7
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -1692,14 +1692,13 @@ Status MultiBatchGraphCopyer::LinkToNodeOutBranch(const NodePtr &node) {
}

Status ProcessMultiBatch(ComputeGraphPtr &graph) {
if (GetLocalOmgContext().dynamic_node_type.empty()) {
const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN");
if (multi_batch_with_switchn == nullptr) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
return pass_manager.Run(graph);
}
const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE");
if (multi_batch_with_case != nullptr) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass));
return pass_manager.Run(graph);
}

if (!GetLocalOmgContext().need_multi_batch) {
GELOGI("No need to process_multi for no_train graph.");
return SUCCESS;


+ 10
- 1
ge/host_kernels/dynamic_stitch_kernel.cc View File

@@ -33,6 +33,8 @@ namespace {
const int kDoubleAttrN = 2;
const int kFirstOutputDescIdx = 0;
const int kMergedShapeSecondDim = 1;
const size_t kNullTensorDimNum = 1;
const int64_t kNullTensorDimValue = 0;
const std::set<DataType> kSupportedTypeSet = {DT_INT8, DT_UINT8, DT_INT16, DT_UINT16, DT_INT32,
DT_INT64, DT_BOOL, DT_FLOAT16, DT_FLOAT, DT_DOUBLE};
} // namespace
@@ -177,7 +179,14 @@ Status DynamicStitchKernel::StitchDataFollowIndices(int64_t data_unit, const vec
int64_t src_offset = 0;
std::set<int32_t> indices_set;
for (int i = 0; i < n_; i++) {
auto indices_shape_size = input[i]->GetTensorDesc().GetShape().GetShapeSize();
GeShape indices_shape = input[i]->GetTensorDesc().GetShape();
size_t indices_dim_num = indices_shape.GetDimNum();
// skip null indices tensor
if (indices_dim_num == kNullTensorDimNum && indices_shape.GetDim(0) == kNullTensorDimValue) {
GELOGD("Input indices[%d] has null tensor, skip it.", i);
continue;
}
auto indices_shape_size = indices_shape.GetShapeSize();
// to normalize logic, assume scalar as vector with shape of [1].
indices_shape_size = (indices_shape_size == 0) ? 1 : indices_shape_size;
// all index for input is less than size of input


+ 22
- 34
ge/hybrid/executor/node_state.cc View File

@@ -18,7 +18,6 @@
#include <chrono>
#include "framework/common/debug/log.h"
#include "graph/compute_graph.h"
#include "graph/utils/tensor_utils.h"
#include "hybrid_execution_context.h"
#include "subgraph_context.h"

@@ -36,31 +35,29 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
this->num_pending_shapes_);
}

Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) {
Status ShapeInferenceState::UpdateInputShape(int idx,
const GeShape &ori_shape,
const GeShape &shape) {
if (node_item.IsInputShapeStatic(idx)) {
GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]",
node_item.NodeName().c_str(),
idx,
node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(),
target.GetShape().ToString().c_str());
shape.ToString().c_str());
return SUCCESS;
}

int64_t tensor_size = -1;
(void) TensorUtils::GetSize(target, tensor_size);
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld",
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]",
node_item.NodeName().c_str(),
idx,
target.GetShape().ToString().c_str(),
target.GetOriginShape().ToString().c_str(),
tensor_size);
shape.ToString().c_str(),
ori_shape.ToString().c_str());

std::lock_guard<std::mutex> lk(mu_);
auto tensor_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(tensor_desc);
tensor_desc->SetShape(target.GetShape());
tensor_desc->SetOriginShape(target.GetOriginShape());
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
tensor_desc->SetShape(shape);
tensor_desc->SetOriginShape(ori_shape);
if (--num_pending_shapes_ == 0) {
ready_cv_.notify_all();
}
@@ -113,24 +110,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex
for (auto &p : shape_futures) {
auto idx = p.first;
auto &future = p.second;
GeShape shape;
GeShape ori_shape;
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx);
auto src_tensor_desc = future.GetTensorDesc();
GE_CHECK_NOTNULL(src_tensor_desc);
GE_CHK_STATUS_RET(future.Get(ori_shape, shape),
"[%s] Get shape failed. index = %u",
node_item.NodeName().c_str(),
idx);
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx);

auto input_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(input_desc);
int64_t tensor_size = -1;
(void) TensorUtils::GetSize(*src_tensor_desc, tensor_size);
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu",
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]",
node_item.NodeName().c_str(),
idx,
src_tensor_desc->GetShape().ToString().c_str(),
src_tensor_desc->GetOriginShape().ToString().c_str(),
tensor_size);
input_desc->SetShape(src_tensor_desc->GetShape());
input_desc->SetOriginShape(src_tensor_desc->GetOriginShape());
(void) TensorUtils::SetSize(*input_desc, tensor_size);
shape.ToString().c_str(),
ori_shape.ToString().c_str());
auto input_desc = node_item.MutableInputDesc(idx);
GE_CHECK_NOTNULL(input_desc);
input_desc->SetShape(std::move(shape));
input_desc->SetOriginShape(ori_shape);
}

return SUCCESS;
@@ -193,14 +190,5 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) {
GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str());
return SUCCESS;
}

GeTensorDescPtr ShapeFuture::GetTensorDesc() {
GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str());
if (!subgraph_context_->Await(src_node_)) {
GELOGE(INTERNAL_ERROR, "cancelled");
return nullptr;
}
return src_node_->GetOpDesc()->MutableOutputDesc(src_index_);
}
} // namespace hybrid
} // namespace ge

+ 1
- 2
ge/hybrid/executor/node_state.h View File

@@ -35,7 +35,6 @@ class ShapeFuture {
ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context);
~ShapeFuture() = default;
Status Get(GeShape &ori_shape, GeShape &shape);
GeTensorDescPtr GetTensorDesc();

private:
NodePtr src_node_;
@@ -46,7 +45,7 @@ class ShapeFuture {
struct ShapeInferenceState {
explicit ShapeInferenceState(const NodeItem &node_item);

Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc);
Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape);

void UpdateInputShapeFuture(int idx, ShapeFuture &&future);



+ 8
- 1
ge/hybrid/executor/subgraph_executor.cc View File

@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
GE_CHECK_NOTNULL(tensor_desc);
auto node_state = subgraph_context_->GetOrCreateNodeState(input_node);
GE_CHECK_NOTNULL(node_state);
node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc);
node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape());
}
}

@@ -268,6 +268,13 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
} else {
node_state.SetKernelTask(node_item.kernel_task);
}

GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node),
"[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str());
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str());
return SUCCESS;
}



+ 12
- 16
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -20,9 +20,12 @@
#include "graph/utils/tensor_adapter.h"
#include "graph/debug/ge_attr_define.h"
#include "hybrid/node_executor/node_executor.h"
#include "hybrid/executor//worker//shape_inference_engine.h"
#include "common/dump/dump_manager.h"
#include "common/dump/dump_op.h"
#include "common/types.h"
#include "common/ge_types.h"
#include "common/profiling/profiling_manager.h"
#include "runtime/base.h"

namespace ge {
namespace hybrid {
@@ -151,19 +154,18 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(model);

// only report aicpu and aicore node
bool is_profiling_report = context_->GetNodeItem().is_profiling_report;
if (!is_profiling_report) {
GELOGD("Node[%s] is not aicore or aicpu, and no need to report data.", node->GetName().c_str());
return SUCCESS;
}

GELOGD("GetTaskDescInfo of node [%s] start.", node->GetName().c_str());
auto op_desc = node->GetOpDesc();
std::string op_name = op_desc->GetName();
std::string dynamic_model_name = model->GetModelName();
uint32_t task_id = context_->GetTaskId();
uint32_t stream_id = context_->GetStreamId();

uint32_t task_id = 0;
uint32_t stream_id = 0;
if (rtGetTaskIdAndStreamID(&task_id, &stream_id) != RT_ERROR_NONE) {
GELOGE(PARAM_INVALID, "Get task_id and stream_id failed.");
return PARAM_INVALID;
}

TaskDescInfo tmp_task_desc_info;
tmp_task_desc_info.model_name = dynamic_model_name;
tmp_task_desc_info.op_name = op_name;
@@ -175,8 +177,6 @@ Status NodeDoneCallback::GetTaskDescInfo(const NodePtr node, const HybridModel *
}
tmp_task_desc_info.task_id = task_id;
tmp_task_desc_info.stream_id = stream_id;
tmp_task_desc_info.shape_type = "dynamic";
tmp_task_desc_info.cur_iter_num = graph_context_->iteration;
GELOGD("GetTaskDescInfo of node [%s] end, task_id[%u], stream_id[%u]",
node->GetName().c_str(), task_id, stream_id);
task_desc_info.emplace_back(tmp_task_desc_info);
@@ -348,10 +348,6 @@ Status NodeDoneCallback::OnNodeDone() {
}

GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item));
if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) {
// update output tensor sizes
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item));
}
// PropagateOutputs for type == DEPEND_COMPUTE
if (node_item.shape_inference_type == DEPEND_COMPUTE) {
if (graph_context_->trace_enabled) {


+ 18
- 103
ge/hybrid/executor/worker/shape_inference_engine.cc View File

@@ -17,15 +17,9 @@
#include "hybrid/executor/worker/shape_inference_engine.h"
#include "graph/shape_refiner.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "common/math/math_util.h"
#include "hybrid/node_executor/node_executor.h"

namespace ge {
namespace {
const int kAlignment = 32;
}
namespace hybrid {
ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context)
: execution_context_(execution_context),
@@ -46,9 +40,7 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
}

if (node_item.fused_subgraph != nullptr) {
GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph));
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item));
return SUCCESS;
return InferShapeForSubgraph(node_item, *node_item.fused_subgraph);
}

// Skip shape inference for node of type DEPEND_COMPUTE
@@ -71,15 +63,21 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) {
std::lock_guard<std::mutex> lk(mu_);
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start");
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true),
"Invoke InferShapeAndType failed.");
"Invoke InferShapeAndType failed.");
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End");
}
// Check again to make sure shape is valid after shape inference
if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) {
bool is_unknown_shape = false;
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape),
"Failed to get shape status. node = %s",
node_item.NodeName().c_str());

// update output tensor sizes after shape inference
// error if shape is still unknown and not of type DEPEND_SHAPE_RANGE
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start");
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE));
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End");
GE_CHK_BOOL_RET_STATUS(!is_unknown_shape,
INTERNAL_ERROR,
"[%s] Shape is still unknown after shape inference.",
node_item.NodeName().c_str());
}

GELOGD("[%s] [HybridTrace] After shape inference. Node = %s",
node_item.NodeName().c_str(),
@@ -129,6 +127,8 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
// propagate each output
for (int i = 0; i < node_item.num_outputs; ++i) {
auto output_desc = node_item.op_desc->MutableOutputDesc(i);
const auto &shape = output_desc->MutableShape();
const auto &ori_shape = output_desc->GetOriginShape();
auto &output_nodes = node_item.outputs[i];

// propagate output to all sub-inputs
@@ -149,7 +149,9 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) {
infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first,
std::move(future));
} else {
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc));
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first,
ori_shape,
shape));
}
}
}
@@ -228,92 +230,5 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) {
}
return SUCCESS;
}

Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc,
std::vector<int64_t> &shape,
bool fallback_with_range) {
const auto &tensor_shape = tensor_desc.MutableShape();
if (tensor_shape.IsUnknownShape()) {
if (!fallback_with_range) {
GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]",
tensor_shape.ToString().c_str());
return INTERNAL_ERROR;
}

GELOGD("Calc output size by range");
std::vector<std::pair<int64_t, int64_t>> shape_range;
GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range");
if (shape_range.size() != shape.size()) {
GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)",
shape_range.size(),
shape.size());
return INTERNAL_ERROR;
}

for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) {
if (shape[dim_index] == ge::UNKNOWN_DIM) {
shape[dim_index] = shape_range[dim_index].second;
}
}

GELOGD("After canonicalization, shape = [%s], before = [%s]",
GeShape(shape).ToString().c_str(),
tensor_shape.ToString().c_str());
}

return SUCCESS;
}

Status ShapeInferenceEngine::CalcTensorSize(DataType data_type,
const std::vector<int64_t> &shape,
int64_t &tensor_size) {
GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str());
uint32_t type_size;
if (!TypeUtils::GetDataTypeLength(data_type, type_size)) {
GELOGE(INTERNAL_ERROR, "Failed to get data type size");
return INTERNAL_ERROR;
}

tensor_size = type_size;
for (const auto &dim : shape) {
GE_CHECK_GE(dim, 0);
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim),
"Shape size overflow, shape = [%s]",
GeShape(shape).ToString().c_str());
tensor_size *= dim;
}

GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1),
"Tensor size is too large: %ld, shape = [%s]",
tensor_size,
GeShape(shape).ToString().c_str());
tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment;
return SUCCESS;
}

Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) {
auto op_desc = node_item.GetOpDesc();
for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) {
auto tensor_desc = op_desc->MutableOutputDesc(output_index);
GE_CHECK_NOTNULL(tensor_desc);
const auto &shape = tensor_desc->MutableShape();
// modify on copy
auto dims = shape.GetDims();
GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range),
"[%s] Failed to canonicalize shape for output %zu",
node_item.NodeName().c_str(),
output_index);

int64_t tensor_size;
GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size),
"[%s] Failed to calc tensor size for output %zu",
node_item.NodeName().c_str(),
output_index);
GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size);
(void) TensorUtils::SetSize(*tensor_desc, tensor_size);
}

return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 0
- 4
ge/hybrid/executor/worker/shape_inference_engine.h View File

@@ -34,11 +34,7 @@ class ShapeInferenceEngine {

Status PropagateOutputShapes(const NodeItem &node_item);

static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false);

private:
static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range);
static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size);
static Status UpdatePeerNodeShape(const Node &node);
Status AwaitDependentNodes(NodeState &node_state);



+ 27
- 4
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -21,6 +21,7 @@
#include "graph/build/memory/var_mem_assign_util.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "graph/load/new_model_manager/model_manager.h"
#include "graph/manager/graph_var_manager.h"
#include "graph/manager/host_mem_manager.h"
#include "graph/manager/trans_var_data_utils.h"
@@ -226,10 +227,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
new_node->node_id = node_index;
new_node->op_desc->SetId(node_index);
node_index += 1;
NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) ||
(executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) ||
(executor_type == NodeExecutorManager::ExecutorType::AICPU_CUSTOM);

*node_item = new_node.get();
node_items[node] = std::move(new_node);
return SUCCESS;
@@ -924,6 +922,7 @@ Status HybridModelBuilder::InitWeights() {
}

Status HybridModelBuilder::LoadTasks() {
GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
for (auto &it : hybrid_model_.node_items_) {
auto &node_item = it.second;
auto &node_ptr = node_item->node;
@@ -1560,5 +1559,29 @@ Status HybridModelBuilder::BuildInputMapping(GraphItem &graph_item,

return SUCCESS;
}

Status HybridModelBuilder::CheckAicpuOpList() {
std::vector<std::string> aicpu_optype_list;
std::vector<std::string> aicpu_tf_optype_list;
std::set<std::string> aicpu_optype_set;
std::set<std::string> aicpu_tf_optype_set;
for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
auto &ge_model = it.second;
GE_CHECK_NOTNULL(ge_model);
if (ge::AttrUtils::GetListStr(*ge_model, "needCheckCpu", aicpu_optype_list)) {
aicpu_optype_set.insert(aicpu_optype_list.begin(), aicpu_optype_list.end());
}

if (ge::AttrUtils::GetListStr(*ge_model, "needCheckTf", aicpu_tf_optype_list)) {
aicpu_tf_optype_set.insert(aicpu_tf_optype_list.begin(), aicpu_tf_optype_list.end());
}
}
// reset list with set
aicpu_optype_list.assign(aicpu_optype_set.begin(), aicpu_optype_set.end());
aicpu_tf_optype_list.assign(aicpu_tf_optype_set.begin(), aicpu_tf_optype_set.end());
GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),
"Launch check aicpu op type failed.");
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 1
- 0
ge/hybrid/model/hybrid_model_builder.h View File

@@ -78,6 +78,7 @@ class HybridModelBuilder {
Status ParseVarOutputs(NodeItem &node_item);
Status LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem *parent_node_item);
Status RecoverGraphUnknownFlag();
Status CheckAicpuOpList();

const char* GetGraphName() const {
return hybrid_model_.model_name_.c_str();


+ 34
- 57
ge/hybrid/model/node_item.cc View File

@@ -22,7 +22,6 @@
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/node_utils.h"
#include "hybrid/node_executor/node_executor.h"
#include "hybrid/executor/worker/shape_inference_engine.h"

namespace ge {
namespace hybrid {
@@ -48,7 +47,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr
GE_CHECK_NOTNULL(dst_op_desc);
auto in_idx = node_and_anchor.second->GetIdx();
auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx);
fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc);
fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc);
GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx);
}

@@ -65,7 +64,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap
return FAILED;
}

fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc);
fused_subgraph.output_mapping.emplace(parent_index, op_desc);
return SUCCESS;
}

@@ -127,7 +126,12 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite
return SUCCESS;
}

void NodeItem::ResolveOptionalInputs() {
Status NodeItem::Init() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());

if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) {
has_optional_inputs = true;
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
@@ -139,18 +143,7 @@ void NodeItem::ResolveOptionalInputs() {
}
}
}
}

Status NodeItem::InitInputsAndOutputs() {
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX);
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX);
num_inputs = static_cast<int>(op_desc->GetInputsSize());
num_outputs = static_cast<int>(op_desc->GetOutputsSize());
ResolveOptionalInputs();
return SUCCESS;
}

Status NodeItem::ResolveDynamicState() {
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
@@ -158,54 +151,38 @@ Status NodeItem::ResolveDynamicState() {
"[%s] Failed to get shape status.",
node->GetName().c_str());
}
return SUCCESS;
}

Status NodeItem::ResolveStaticInputsAndOutputs() {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
if (is_dynamic) {
for (int i = 0; i < num_inputs; ++i) {
const auto &input_desc = MutableInputDesc(i);
GE_CHECK_NOTNULL(input_desc);
if (input_desc->MutableShape().IsUnknownShape()) {
is_input_shape_static_.push_back(false);
} else {
num_static_input_shapes++;
is_input_shape_static_.push_back(true);
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]",
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str());
}
}
}

for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
for (int i = 0; i < num_outputs; ++i) {
const auto &output_desc = op_desc->MutableOutputDesc(i);
GE_CHECK_NOTNULL(output_desc);
if (output_desc->MutableShape().IsUnknownShape()) {
is_output_shape_static = false;
break;
}
}
}

if (is_output_shape_static) {
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this));
}
return SUCCESS;
}

void NodeItem::ResolveUnknownShapeType() {
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}
}
if (IsControlOp() || node_type == PARTITIONEDCALL) {
shape_inference_type = DEPEND_COMPUTE;
} else {
int32_t unknown_shape_type_val = 0;
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val);
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val);
}

Status NodeItem::Init() {
GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs());
GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState());
if (is_dynamic) {
ResolveUnknownShapeType();
GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs());
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str());
}



+ 0
- 6
ge/hybrid/model/node_item.h View File

@@ -99,16 +99,10 @@ struct NodeItem {
std::map<int, int> reuse_inputs;
std::map<int, int> reuse_outputs;
int num_static_input_shapes = 0;
bool is_profiling_report = false;

private:
explicit NodeItem(NodePtr node);
Status Init();
Status InitInputsAndOutputs();
void ResolveOptionalInputs();
Status ResolveDynamicState();
Status ResolveStaticInputsAndOutputs();
void ResolveUnknownShapeType();

std::vector<bool> is_input_shape_static_;
std::vector<uint32_t> input_desc_indices_;


+ 0
- 10
ge/hybrid/node_executor/aicore/aicore_node_executor.cc View File

@@ -165,16 +165,6 @@ Status AiCoreNodeTask::ExecuteAsync(TaskContext &context, std::function<void()>
}
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] Start");
GE_CHK_STATUS_RET_NOLOG((*it)->LaunchKernel(context.GetStream()));
uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCore node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[AiCoreNodeLaunchKernel] End");
}


+ 0
- 11
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -189,17 +189,6 @@ Status AicpuNodeTaskBase::ExecuteAsync(TaskContext &context, std::function<void(

GE_CHK_STATUS_RET(LaunchTask(context));

uint32_t task_id = 0;
uint32_t stream_id = 0;
rtError_t rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Get task_id and stream_id failed.");
return rt_ret;
}
context.SetTaskId(task_id);
context.SetStreamId(stream_id);
GELOGD("AiCpu node[%s] task_id: %u, stream_id: %u.", context.GetNodeName(), task_id, stream_id);

auto callback = [=, &context]() {
GELOGD("Node[%s] callback start.", node_name_.c_str());
RECORD_CALLBACK_EVENT(context.GetExecutionContext(), node_name_.c_str(), "[TaskCallback] Start");


+ 14
- 2
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -123,11 +123,22 @@ Status KnownNodeTask::Init(TaskContext &context) {
davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
}
if (!load_flag_) {
auto dump_properties = context.GetDumpProperties();
if (dump_properties.IsDumpOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
}
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model_->SetDeviceId(device_id);
GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed.");
load_flag_ = true;
} else {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
davinci_model_->Id()), "KnownNodeTask::Init destroy aicpu kernel failed.");
davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed.");
}
GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName());
return SUCCESS;
@@ -161,8 +172,9 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node

// set known node flag as true
davinci_model->SetKnownNode(true);
davinci_model->SetId(model.GetModelId());
// set model id as root node's node id
davinci_model->SetId(node->GetOpDesc()->GetId());
davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
GELOGD("KnownNodeExecutor::LoadTask node id %ld.", node->GetOpDesc()->GetId());

GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed.");


+ 0
- 38
ge/hybrid/node_executor/task_context.cc View File

@@ -148,10 +148,6 @@ Status TaskContext::AllocateWorkspaces() {
}

Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const {
if (callback_fun == nullptr) {
GELOGW("[%s] Callback is NULL", GetNodeName());
return SUCCESS;
}
auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun);
if (ret != SUCCESS) {
GELOGE(ret, "[%s] Failed to register callback", GetNodeName());
@@ -319,22 +315,6 @@ void TaskContext::SetStatus(Status status) {
}
}

uint32_t TaskContext::GetTaskId() const {
return task_id_;
}

void TaskContext::SetTaskId(uint32_t task_id) {
task_id_ = task_id;
}

uint32_t TaskContext::GetStreamId() const {
return stream_id_;
}

void TaskContext::SetStreamId(uint32_t stream_id) {
stream_id_ = stream_id;
}

Status TaskContext::AllocateWorkspace(size_t size, void **buffer, void *ori_addr) {
GE_CHECK_NOTNULL(buffer);
if (ori_addr == nullptr) {
@@ -404,20 +384,6 @@ const char *TaskContext::GetNodeName() const {
return node_item_->NodeName().c_str();
}

void TaskContext::ReleaseInputsAndOutputs() {
for (int i = 0; i < node_item_->num_inputs; ++i) {
auto tensor = inputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i);
}

for (int i = 0; i < node_item_->num_outputs; ++i) {
auto tensor = outputs_start_ + i;
tensor->Destroy();
GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i);
}
}

void TaskContext::ReleaseInput(int index) {
auto input_tensor = MutableInput(index);
if (input_tensor != nullptr) {
@@ -490,9 +456,5 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con
const DumpProperties &TaskContext::GetDumpProperties() const {
return execution_context_->dump_properties;
}

bool TaskContext::NeedCallback() {
return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0;
}
} // namespace hybrid
} // namespace ge

+ 0
- 10
ge/hybrid/node_executor/task_context.h View File

@@ -50,8 +50,6 @@ class TaskContext {
ConstGeTensorDescPtr GetOutputDesc(int index) const;
GeTensorDescPtr MutableInputDesc(int index) const;
GeTensorDescPtr MutableOutputDesc(int index) const;
void ReleaseInputsAndOutputs();
bool NeedCallback();
void ReleaseInput(int index);
const TensorValue *GetInput(int index) const;
const TensorValue *GetOutput(int index) const;
@@ -96,12 +94,6 @@ class TaskContext {

void SetStatus(Status status);

uint32_t GetTaskId() const;
void SetTaskId(uint32_t task_id);

uint32_t GetStreamId() const;
void SetStreamId(uint32_t stream_id);

bool IsForceInferShape() const;
void SetForceInferShape(bool force_infer_shape);
void *handle_ = nullptr;
@@ -123,8 +115,6 @@ class TaskContext {
Status status_ = SUCCESS;
std::vector<void *> workspaces_;
uint64_t iteration_ = 0;
uint32_t task_id_= 0;
uint32_t stream_id_ = 0;
};
} // namespace hybrid
} // namespace ge


+ 15
- 4
ge/ir_build/atc_ir_common.cc View File

@@ -51,6 +51,7 @@ const char *const kDigitError = "is not digit";
const char *const kCompressWeightError = "it must be appointed when appoint parameter[--optypelist_for_implmode]";
const char *const kSelectImplmodeError = "only support high_performance, high_precision";
const char *const kDynamicBatchSizeError = "It can only contains digit, \",\", \" \"";
const char *const kKeepDtypeError = "file not found";

vector<string> SplitInputShape(const std::string &input_shape) {
vector<string> shape_pair_vec;
@@ -63,19 +64,18 @@ vector<string> SplitInputShape(const std::string &input_shape) {
}
} // namespace

Status CheckInputFormat(const string &input_format) {
Status CheckInputFormat(const std::string &input_format) {
if (input_format.empty()) {
return ge::SUCCESS;
}
if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"});
GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str());
"E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format not found"});
GELOGE(ge::PARAM_INVALID, "user input format [%s] is not found!", input_format.c_str());
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
}

bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map,
std::string &dynamic_batch_size) {
int32_t size = 0;
@@ -439,6 +439,17 @@ Status CheckCompressWeightParamValid(const std::string enable_compress_weight, c
return ge::SUCCESS;
}

Status CheckKeepTypeParamValid(const std::string &keep_dtype) {
if ((!keep_dtype.empty()) && (!CheckInputPathValid(keep_dtype, "--keep_dtype"))) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E10001", {"parameter", "value", "reason"}, {"--keep_dtype", keep_dtype, kKeepDtypeError});
GELOGE(ge::PARAM_INVALID, "keep dtype config file not found, file_name:%s", keep_dtype.c_str());
return ge::PARAM_INVALID;
}

return ge::SUCCESS;
}

int CheckLogParamValidAndSetLogLevel(const std::string log) {
int ret = -1;
if (log == "default") {


+ 2
- 1
ge/ir_build/atc_ir_common.h View File

@@ -75,7 +75,8 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf);
Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory);
Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream);
Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode);
Status CheckInputFormat(const string &input_format);
Status CheckInputFormat(const std::string &input_format);
Status CheckKeepTypeParamValid(const std::string &keep_dtype);
void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips);
void EraseEndSemicolon(std::string &param);
}


+ 0
- 36
ge/ir_build/ge_ir_build.cc View File

@@ -581,42 +581,6 @@ graphStatus aclgrphGetIRVersion(int *major_version, int *minor_version, int *pat
return GRAPH_SUCCESS;
}

graphStatus aclgrphInferShapeAndType(ge::Graph &graph) {
auto compute_graph = GraphUtils::GetComputeGraph(graph);
GE_CHECK_NOTNULL(compute_graph);

auto root_graph = compute_graph->GetParentGraph();
if (root_graph != nullptr) {
GELOGE(GRAPH_PARAM_INVALID, "Input param should not be subgraph");
return GRAPH_PARAM_INVALID;
}

auto ret = compute_graph->TopologicalSorting();
if (ret != GRAPH_SUCCESS) {
GELOGE(ret, "Acl topo logical sort failed.");
return ret;
}

ret = compute_graph->InferOriginFormat();
if (ret != GRAPH_SUCCESS) {
GELOGE(ret, "Acl InferOriginFormat failed.");
return ret;
}

for (auto &node: compute_graph->GetAllNodes()) {
graphStatus ret = ShapeRefiner::InferShapeAndType(node);
if (ret == GRAPH_PARAM_INVALID) {
GELOGW("Can not find infershape func.");
continue;
} else if (ret != GRAPH_SUCCESS) {
GELOGE(ret, "Acl infershape failed.");
return ret;
}
}

return GRAPH_SUCCESS;
}

graphStatus aclgrphDumpGraph(const ge::Graph &graph, const char *file, const size_t len) {
GE_CHECK_NOTNULL(file);



+ 1
- 0
ge/offline/CMakeLists.txt View File

@@ -10,6 +10,7 @@ protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
set(SRC_LIST
"main.cc"
"single_op_parser.cc"
"keep_dtype_option.cc"
"../session/omg.cc"
"../ir_build/atc_ir_common.cc"
)


+ 116
- 0
ge/offline/keep_dtype_option.cc View File

@@ -0,0 +1,116 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "keep_dtype_option.h"
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include "graph/debug/ge_attr_define.h"
#include "framework/common/util.h"
#include "common/util/error_manager/error_manager.h"
namespace ge {
namespace {
const size_t kMaxOpsNum = 10;
} // namespace
bool IsOriginalOpFind(OpDescPtr &op_desc, const std::string &op_name) {
std::vector<std::string> original_op_names;
if (!AttrUtils::GetListStr(op_desc, ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names)) {
return false;
}
for (auto &origin_name : original_op_names) {
if (origin_name == op_name) {
return true;
}
}
return false;
}
void KeepDtypeReportError(const std::vector<std::string> &invalid_list) {
std::stringstream err_msg;
size_t list_size = invalid_list.size();
err_msg << "config file contains " << list_size;
if (list_size == 1) {
err_msg << " operator not in the graph, op name:";
} else {
err_msg << " operators not in the graph, op names:";
}
for (size_t i = 0; i < list_size; i++) {
if (i == kMaxOpsNum) {
err_msg << "..";
break;
}
err_msg << invalid_list[i];
if (i != list_size - 1) {
err_msg << " ";
}
}
ErrorManager::GetInstance().ATCReportErrMessage(
"E10042", {"parameter", "reason"}, {"keep_dtype", err_msg.str().c_str()});
GELOGE(FAILED, "%s", err_msg.str().c_str());
}
Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype) {
GE_CHECK_NOTNULL(graph);
if (keep_dtype.empty()) {
return SUCCESS;
}
std::string real_path = RealPath(keep_dtype.c_str());
if (real_path.empty()) {
GELOGE(PARAM_INVALID, "Can not get real path for %s.", keep_dtype.c_str());
return PARAM_INVALID;
}
std::ifstream ifs(real_path);
if (!ifs.is_open()) {
GELOGE(FAILED, "Open file %s failed", keep_dtype.c_str());
return FAILED;
}
std::string op_name;
std::vector<std::string> invalid_list;
while (std::getline(ifs, op_name)) {
if (op_name.empty()) {
continue;
}
op_name = StringUtils::Trim(op_name);
bool is_find = false;
for (auto &node_ptr : graph->GetDirectNode()) {
auto op_desc = node_ptr->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
if ((op_desc->GetName() == op_name) || IsOriginalOpFind(op_desc, op_name)) {
is_find = true;
(void)AttrUtils::SetInt(op_desc, ATTR_NAME_KEEP_DTYPE, 1);
}
}
if (!is_find) {
invalid_list.push_back(op_name);
}
}
ifs.close();
if (!invalid_list.empty()) {
KeepDtypeReportError(invalid_list);
return PARAM_INVALID;
}
return SUCCESS;
}
} // namespace ge

+ 26
- 0
ge/offline/keep_dtype_option.h View File

@@ -0,0 +1,26 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef KEEP_DTYPE_OPTION_H_
#define KEEP_DTYPE_OPTION_H_
#include <string>
#include "graph/compute_graph.h"
#include "framework/common/ge_inner_error_codes.h"
namespace ge {
Status DealKeepDtypeOption(const ComputeGraphPtr &graph, const std::string &keep_dtype);
} // namespace
#endif // KEEP_DTYPE_OPTION_H_

+ 16
- 0
ge/offline/main.cc View File

@@ -43,6 +43,7 @@
#include "parser/common/register_tbe.h"
#include "register/op_registry.h"
#include "single_op_parser.h"
#include "keep_dtype_option.h"

using domi::BuildMode;
using domi::OpRegistrationData;
@@ -109,6 +110,9 @@ DEFINE_string(precision_mode, "force_fp16",
"Optional; precision mode."
"Support force_fp16, allow_mix_precision, allow_fp32_to_fp16, must_keep_origin_dtype.");

DEFINE_string(keep_dtype, "",
"Optional; config file to specify the precision used by the operator during compilation.");

DEFINE_string(input_format, "",
"Optional; input_format, format of input data, NCHW;NHWC."
"Format:\"NHWC\"");
@@ -285,6 +289,8 @@ class GFlagUtils {
"\n[Operator Tuning]\n"
" --precision_mode precision mode, support force_fp16(default), allow_mix_precision, "
"allow_fp32_to_fp16, must_keep_origin_dtype.\n"
" --keep_dtype Retains the precision of certain operators in inference "
"scenarios by using a configuration file.\n"
" --auto_tune_mode Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n"
" --op_select_implmode Set op select implmode. Support high_precision, high_performance. "
"default: high_performance\n"
@@ -421,6 +427,9 @@ class GFlagUtils {
FLAGS_enable_compress_weight, FLAGS_compress_weight_conf) == ge::SUCCESS,
ret = ge::FAILED, "check compress weight failed!");

GE_CHK_BOOL_EXEC(ge::CheckKeepTypeParamValid(FLAGS_keep_dtype) == ge::SUCCESS,
ret = ge::FAILED, "check keep dtype failed!");

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
!ge::CheckOutputPathValid(FLAGS_check_report, "--check_report"), ret = ge::FAILED,
"check_report file %s not found!!", FLAGS_check_report.c_str());
@@ -979,6 +988,13 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
}
}

Status ret = ge::DealKeepDtypeOption(ge::GraphUtils::GetComputeGraph(graph), FLAGS_keep_dtype);
if (ret != SUCCESS) {
(void)ge_generator.Finalize();
(void)ge::GELib::GetInstance()->Finalize();
return ret;
}

geRet = ge_generator.GenerateOfflineModel(graph, output, inputs);
if (geRet != ge::SUCCESS) {
DOMI_LOGE("GE GenerateOfflineModel execute failed");


+ 3
- 0
ge/offline/module.mk View File

@@ -11,6 +11,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
LOCAL_SRC_FILES := \
main.cc \
single_op_parser.cc \
keep_dtype_option.cc \
../session/omg.cc \
../ir_build/atc_ir_common.cc \

@@ -64,6 +65,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
LOCAL_SRC_FILES := \
main.cc \
single_op_parser.cc \
keep_dtype_option.cc \
../session/omg.cc \
../ir_build/atc_ir_common.cc \

@@ -117,6 +119,7 @@ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dg
LOCAL_SRC_FILES := \
main.cc \
single_op_parser.cc \
keep_dtype_option.cc \
../session/omg.cc \
../ir_build/atc_ir_common.cc \



+ 2
- 2
ge/omm/csa_interact.cc View File

@@ -78,8 +78,8 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
Status CsaInteract::WriteJobState(JobState job_state, JobSubState job_sub_state, uint32_t module_ret_errcode,
ErrorModule error_module) {
if (!is_init_) {
GELOGE(INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
return INTERNAL_ERROR;
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "CsaInteract has not init, can't WriteJobState");
return ACL_ERROR_GE_INTERNAL_ERROR;
}
if ((curr_state_ == JOBSTATE_FAILED) || (curr_state_ == JOBSTATE_KILLED)) {
return SUCCESS;


+ 8
- 0
ge/proto/caffe/caffe.proto View File

@@ -1,3 +1,11 @@
/**
* This file is part of Open Source Software caffe, version 1.0 https://github.com/BVLC/caffe
*
* This file is included by GraphEngine so as to support model format conversion from caffe model to GraphEngine model.
* This file in this distribution may have been modified by Huawei Technologies Co., Ltd ("Huawei Modifications").
* All Huawei Modifications are Copyright 2019-2020 Huawei Technologies Co., Ltd.
*/

syntax = "proto2";

package domi.caffe;


+ 2
- 0
ge/proto/dump_task.proto View File

@@ -108,4 +108,6 @@ message DumpData{
repeated OpOutput output = 3;
repeated OpInput input = 4;
repeated OpBuffer buffer = 5;
string op_name = 6;

}

+ 0
- 2
ge/proto/op_mapping_info.proto View File

@@ -15,7 +15,6 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}

message Input {
@@ -24,7 +23,6 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}

enum BufferType {


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save