Compare commits

...

97 Commits

Author SHA1 Message Date
  mindspore-ci-bot 0123e31c9b !1679 Add OptimizeAfterStage1 4 years ago
  wangzhengjun eaa26d45e9 add OptimizeAfterStage1 4 years ago
  mindspore-ci-bot 4606dc6fc6 !1525 bugfix for release memory 4 years ago
  wuweikang cebc89dd3b bugfix for release memory 4 years ago
  mindspore-ci-bot 0dab7aa75d !1517 reviewbot modification 4 years ago
  wuweikang e73fdf198b reviewbot modification 4 years ago
  mindspore-ci-bot da1a60bc02 !1512 multi-thread online infer 4 years ago
  wuweikang 76c0c3a371 multi-thread online infer 4 years ago
  mindspore-ci-bot 2d446b8def !1503 change model_name for dump 4 years ago
  wjm 48d7b6dc8b fix 4 years ago
  wjm daf8e56d25 fix 4 years ago
  wjm 46156bf04f fix dump 4 years ago
  mindspore-ci-bot ee67c45a2b !1487 Fix hccl control dependency 4 years ago
  mindspore-ci-bot 4b90851c68 !1484 remove unused func InsertMemcpyNode 4 years ago
  mindspore-ci-bot 2e8d863a1e !1482 ge static check 4 years ago
  mindspore-ci-bot 44415f12c8 !1492 modify single op dump bug in c77 4 years ago
  chuxing 36f2c837bf Fix hccl control dependency 4 years ago
  zhou_chao1993 f49599b6c5 modify single op dump bug 4 years ago
  mindspore-ci-bot 99e607c6d1 !1490 fix optional input bug 4 years ago
  wxl d5f56ad31c fix optional input bug 4 years ago
  lichun c73a3c7b46 fix sc check error 4 years ago
  zhou_chao1993 f971f512e3 static check modify 4 years ago
  mindspore-ci-bot 7f73eedb8a !1478 Don't reset -2 when there is aicore op. 4 years ago
  mindspore-ci-bot ed941d6d87 !1461 modify dump single op in c77 4 years ago
  mindspore-ci-bot 089b82e9bd !1469 modify dynamic shape dump in c77 4 years ago
  unknown e52c916f56 Don't reset -2 when there is aicore op. 4 years ago
  mindspore-ci-bot 4c8e5f73c6 !1476 Bugfix: Missing hccl execution dependency due to wrong attribute type of _parallel_group 4 years ago
  mindspore-ci-bot a4783ff468 !1460 Reduce weight memory usage & Remove redundant memcpy 4 years ago
  chuxing 19d1f804c7 Bugfix: keep hccl control dependency 4 years ago
  zhou_chao1993 c90cae1410 modify dynamic shape dump 4 years ago
  mindspore-ci-bot 4c0d85693a !1463 Save atomic kernel bin to model. 4 years ago
  unknown b48ecfe347 Save atomic kernel bin to model. 4 years ago
  mindspore-ci-bot d7b607dc83 !1464 fix aipp check 4 years ago
  zhou_chao1993 637bcc86d6 modify dump single op 4 years ago
  wangxiaotian22 30743e1e59 fix aipp check 4 years ago
  chuxing 24b2437361 Fix dump for known-shaped subgraph 4 years ago
  unknown 3ef3f54d94 Save atomic kernel bin to model. 4 years ago
  mindspore-ci-bot 34f09f4fc8 !1447 LinkToPotentialPrecedenceNode 4 years ago
  mindspore-ci-bot 73e7c53f8a !1448 Fix bug of const input index. 4 years ago
  mindspore-ci-bot 494fa061a8 !1444 modify dump content in c77 4 years ago
  mindspore-ci-bot aeec1cb08b !1446 modify set dump in c77 4 years ago
  unknown 960cc1fd64 Fix bug of const input index. 4 years ago
  lianghao 5f1e659fcd LinkToPotentialPrecedenceNode 4 years ago
  zhou_chao1993 b1822cc73c modify set dump in c77 4 years ago
  zhou_chao1993 4931c4fa1e modify dump content 4 years ago
  mindspore-ci-bot 9d6aaa117c !1419 Add GetOriginalType for support RefSwitch & RefMerge 4 years ago
  mindspore-ci-bot 0da36c04e4 !1421 fixed sc warning 4 years ago
  mindspore-ci-bot 2ac43d4033 !1430 fix 1951 ts 4g bug 4 years ago
  mindspore-ci-bot 2112a36e80 !1415 support unknown while subgraph 4 years ago
  wxl 68595a656a fix ts 4g memory bug 4 years ago
  李磊 890373c79c fixed reviewbot warning 4 years ago
  chenyemeng 7a40a575f7 Add GetOriginalType for support RefSwitch & RefMerge 4 years ago
  lichun 701b0d6c1b support unknown while subgraph 4 years ago
  mindspore-ci-bot da71533e55 !1345 fixed sc warning 4 years ago
  mindspore-ci-bot af83c480c5 !1388 Feature: Tiger online inference support 4 years ago
  zhaoxinxin c936821629 modified: metadef 4 years ago
  mindspore-ci-bot 971630a7d2 !1400 Bugfix: While loop failed to restore original input after execution 4 years ago
  mindspore-ci-bot 1735e1b1f3 !1402 l2 buffer for f1.3.0 4 years ago
  lichun 12cef9e9b9 support unknown while subgraph 4 years ago
  李磊 7516130c7e delete code 4 years ago
  yangwei 7ec6e4fe61 r13_l2 4 years ago
  mindspore-ci-bot 7ed03d0d0e !1398 fix import 4 years ago
  李磊 1d0359d1c6 fixed pclint warning 4 years ago
  李磊 e9868abe29 fixed sc warning by wangxiaotian 4 years ago
  李磊 4fe73f77bc fixed sc warning 4 years ago
  yangwei 59a3e2e0ff fix import 4 years ago
  chuxing 4a7f623b12 while loop failed to restore input desc 4 years ago
  zhaoxinxin 8e0634323d modified: ge/graph/passes/base_pass.h 4 years ago
  mindspore-ci-bot f19cd2fca9 !1386 Adding dependencies by parallel groups 4 years ago
  mindspore-ci-bot c691f2a7d7 !1385 Fix error of single_op memory free. 4 years ago
  mindspore-ci-bot e2f04ddabd !1375 bugfix for atomic_addr_clean_pass 4 years ago
  zhaoxinxin 50552c3631 modified: ge/graph/passes/base_pass.cc 4 years ago
  chuxing 167621141b hccl ops with same parallel group can not be execute parallelly 4 years ago
  unknown aad154cdf1 Fix error of single_op memory free. 4 years ago
  mindspore-ci-bot aead0be2d6 !1372 online_inference c77 4 years ago
  lianghao 2cf49ced1c online_inference c77 4 years ago
  mindspore-ci-bot b8621d9d0e !1370 bugfix for auto find fp 4 years ago
  y00500818 9d34427af9 bugfix for atomic_addr_clean_pass 4 years ago
  gengchao4@huawei.com 37c928ed29 bugfix for auto find fp 4 years ago
  mindspore-ci-bot 0901ca5581 !1337 Fix bug of single_op inferdepend. 4 years ago
  mindspore-ci-bot 1224cdee8a !1306 dump for unknownshape 4 years ago
  mindspore-ci-bot 56007bea30 !1351 sync runtime head 4 years ago
  mindspore-ci-bot 5367bbe395 !1316 fixed compiled issue for proto files 4 years ago
  李磊 f0d897b0bb fixed compiled issue for proto files 4 years ago
  zhou_chao1993 13ecbe405a sync runtime head 4 years ago
  mindspore-ci-bot 3050d3984a !1307 fix bug of dynamic shape load error 4 years ago
  mindspore-ci-bot 1ccd0dd9ee !1341 modify profiing reporter data max len 4 years ago
  zhengyuanhua 97d93adaa5 modify profiling reporter data max len 4 years ago
  unknown 77d5468cf6 Fix bug of single_op inferdepend. 4 years ago
  wxl a89113e743 fix bug of dynamic shape load error 4 years ago
  mindspore-ci-bot e3fbf4d860 !1277 offline dynamic shape inference support 4 years ago
  lichun e6d3c77e80 offline dynamic shape inference support 4 years ago
  mindspore-ci-bot 81ac111f09 !1259 Unique LabelGoto args addr 4 years ago
  zhangxiaokun 6e874e8b87 Unique LabelGoto args addr 4 years ago
  mindspore-ci-bot 4d6e7acc14 !1252 update submodule 4 years ago
  wqtshg 67bdf03f4b update submodule 4 years ago
  wjm 3401ca857c dump for unknownshape 4 years ago
100 changed files with 2112 additions and 814 deletions
Split View
  1. +2
    -2
      .gitmodules
  2. +1
    -1
      build.sh
  3. +7
    -1
      ge/CMakeLists.txt
  4. +7
    -2
      ge/analyzer/analyzer.cc
  5. +3
    -2
      ge/common/CMakeLists.txt
  6. +1
    -1
      ge/common/dump/dump_manager.cc
  7. +36
    -7
      ge/common/dump/dump_op.cc
  8. +3
    -1
      ge/common/dump/dump_op.h
  9. +8
    -7
      ge/common/dump/dump_properties.cc
  10. +0
    -5
      ge/common/dump/opdebug_register.cc
  11. +2
    -1
      ge/common/formats/format_transfers/datatype_transfer.cc
  12. +2
    -1
      ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc
  13. +4
    -2
      ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc
  14. +4
    -2
      ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc
  15. +20
    -10
      ge/common/formats/format_transfers/format_transfer_fractal_nz.cc
  16. +17
    -9
      ge/common/formats/format_transfers/format_transfer_fractal_z.cc
  17. +20
    -10
      ge/common/formats/format_transfers/format_transfer_fractal_zz.cc
  18. +3
    -2
      ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc
  19. +6
    -4
      ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc
  20. +7
    -5
      ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc
  21. +2
    -1
      ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc
  22. +2
    -1
      ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc
  23. +4
    -2
      ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc
  24. +10
    -5
      ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
  25. +2
    -1
      ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc
  26. +7
    -5
      ge/common/helper/model_helper.cc
  27. +1
    -1
      ge/common/profiling/profiling_manager.cc
  28. +11
    -0
      ge/common/tbe_kernel_store.cc
  29. +3
    -2
      ge/executor/CMakeLists.txt
  30. +39
    -0
      ge/executor/ge_executor.cc
  31. +8
    -6
      ge/ge_local_engine/CMakeLists.txt
  32. +86
    -24
      ge/generator/ge_generator.cc
  33. +0
    -55
      ge/graph/build/graph_builder.cc
  34. +93
    -71
      ge/graph/build/memory/block_mem_assigner.cc
  35. +3
    -3
      ge/graph/build/memory/graph_mem_assigner.cc
  36. +46
    -0
      ge/graph/build/model_builder.cc
  37. +2
    -0
      ge/graph/build/model_builder.h
  38. +6
    -2
      ge/graph/build/task_generator.cc
  39. +77
    -3
      ge/graph/execute/graph_execute.cc
  40. +8
    -2
      ge/graph/execute/graph_execute.h
  41. +0
    -1
      ge/graph/load/graph_loader.cc
  42. +2
    -0
      ge/graph/load/model_manager/data_inputer.h
  43. +96
    -35
      ge/graph/load/model_manager/davinci_model.cc
  44. +29
    -4
      ge/graph/load/model_manager/davinci_model.h
  45. +37
    -19
      ge/graph/load/model_manager/model_manager.cc
  46. +3
    -2
      ge/graph/load/model_manager/model_manager.h
  47. +2
    -1
      ge/graph/load/model_manager/model_utils.cc
  48. +1
    -4
      ge/graph/load/model_manager/task_info/end_graph_task_info.cc
  49. +1
    -2
      ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc
  50. +74
    -49
      ge/graph/load/model_manager/task_info/kernel_task_info.cc
  51. +5
    -0
      ge/graph/load/model_manager/task_info/kernel_task_info.h
  52. +4
    -22
      ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
  53. +8
    -0
      ge/graph/manager/graph_caching_allocator.cc
  54. +7
    -0
      ge/graph/manager/graph_caching_allocator.h
  55. +359
    -147
      ge/graph/manager/graph_manager.cc
  56. +42
    -0
      ge/graph/manager/graph_manager.h
  57. +9
    -0
      ge/graph/manager/graph_manager_utils.cc
  58. +19
    -0
      ge/graph/manager/graph_manager_utils.h
  59. +40
    -2
      ge/graph/optimize/graph_optimize.cc
  60. +3
    -0
      ge/graph/optimize/graph_optimize.h
  61. +39
    -6
      ge/graph/passes/atomic_addr_clean_pass.cc
  62. +8
    -0
      ge/graph/passes/atomic_addr_clean_pass.h
  63. +0
    -1
      ge/graph/passes/attach_stream_label_pass.cc
  64. +61
    -42
      ge/graph/passes/base_pass.cc
  65. +12
    -0
      ge/graph/passes/base_pass.h
  66. +15
    -0
      ge/graph/passes/infershape_pass.cc
  67. +3
    -2
      ge/graph/passes/merge_input_memcpy_pass.cc
  68. +3
    -1
      ge/graph/passes/merge_to_stream_merge_pass.cc
  69. +4
    -2
      ge/graph/passes/next_iteration_pass.cc
  70. +7
    -1
      ge/graph/passes/pass_utils.cc
  71. +2
    -0
      ge/graph/passes/pass_utils.h
  72. +1
    -1
      ge/graph/passes/subexpression_migration_pass.cc
  73. +8
    -2
      ge/graph/passes/switch_dead_branch_elimination.cc
  74. +2
    -0
      ge/graph/passes/switch_to_stream_switch_pass.cc
  75. +2
    -2
      ge/graph/preprocess/graph_preprocess.cc
  76. +2
    -1
      ge/graph/preprocess/insert_op/ge_aipp_op.cc
  77. +9
    -5
      ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
  78. +3
    -2
      ge/host_cpu_engine/CMakeLists.txt
  79. +2
    -1
      ge/host_kernels/gather_v2_kernel.cc
  80. +2
    -1
      ge/hybrid/executor/hybrid_execution_context.h
  81. +11
    -10
      ge/hybrid/executor/hybrid_model_async_executor.cc
  82. +8
    -3
      ge/hybrid/executor/hybrid_model_async_executor.h
  83. +15
    -1
      ge/hybrid/executor/hybrid_model_executor.cc
  84. +10
    -0
      ge/hybrid/executor/hybrid_model_pipeline_executor.cc
  85. +6
    -4
      ge/hybrid/executor/node_state.cc
  86. +2
    -2
      ge/hybrid/executor/subgraph_executor.cc
  87. +15
    -16
      ge/hybrid/executor/worker/execution_engine.cc
  88. +27
    -5
      ge/hybrid/hybrid_davinci_model.cc
  89. +7
    -1
      ge/hybrid/hybrid_davinci_model.h
  90. +13
    -1
      ge/hybrid/hybrid_davinci_model_stub.cc
  91. +20
    -0
      ge/hybrid/model/hybrid_model.cc
  92. +11
    -3
      ge/hybrid/model/hybrid_model.h
  93. +306
    -82
      ge/hybrid/model/hybrid_model_builder.cc
  94. +16
    -4
      ge/hybrid/model/hybrid_model_builder.h
  95. +48
    -8
      ge/hybrid/model/node_item.cc
  96. +12
    -3
      ge/hybrid/model/node_item.h
  97. +43
    -16
      ge/hybrid/node_executor/aicore/aicore_op_task.cc
  98. +8
    -0
      ge/hybrid/node_executor/aicore/aicore_op_task.h
  99. +1
    -0
      ge/hybrid/node_executor/aicore/aicore_task_builder.cc
  100. +44
    -38
      ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc

+ 2
- 2
.gitmodules View File

@@ -1,8 +1,8 @@
[submodule "parser"]
path = parser
url = https://gitee.com/ascend/parser.git
branch = master
branch = r1.3.0
[submodule "metadef"]
path = metadef
url = https://gitee.com/ascend/metadef.git
branch = master
branch = r1.3.0

+ 1
- 1
build.sh View File

@@ -229,7 +229,7 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then
rm -rf ${BASEPATH}/cov
mkdir ${BASEPATH}/cov
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' '/usr/include/*' '*/metadef/*' '*/parser/*' -o cov/coverage.info
cd ${BASEPATH}/cov
genhtml coverage.info
fi


+ 7
- 1
ge/CMakeLists.txt View File

@@ -31,6 +31,7 @@ set(PROTO_HEADER_LIST
protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge PROTO_CLIENT_SRCS PROTO_CLIENT_HDRS ${PROTO_CLIENT_LIST})
protobuf_generate(ge PROTO_HEADER_SRCS PROTO_HEADER_HDRS ${PROTO_HEADER_LIST})
protobuf_generate(ge_client PROTO_CLIENT_HEADER_SRCS PROTO_CLIENT_HEADER_HDRS ${PROTO_HEADER_LIST})

if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
############ libge_proto_common.a ############
@@ -56,7 +57,7 @@ target_link_libraries(ge_proto_common PRIVATE

############ libge_proto_client.a ############
add_library(ge_proto_client STATIC
${PROTO_HEADER_HDRS}
${PROTO_CLIENT_HEADER_HDRS}
${PROTO_CLIENT_SRCS}
)

@@ -65,6 +66,11 @@ target_compile_definitions(ge_proto_client PRIVATE
google=ascend_private
)

target_include_directories(ge_proto_client PRIVATE
${CMAKE_BINARY_DIR}/proto/ge_client
${CMAKE_BINARY_DIR}/proto/ge_client/proto
)

target_compile_options(ge_proto_client PRIVATE
-O2
-fno-common


+ 7
- 2
ge/analyzer/analyzer.cc View File

@@ -221,7 +221,10 @@ ge::Status Analyzer::SaveAnalyzerDataToFile(uint64_t session_id, uint64_t graph_
try {
json_file_ << jsn.dump(kJsonDumpLevel) << std::endl;
} catch (nlohmann::detail::type_error &e) {
GELOGE(FAILED, "[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s], session_id:%lu, graph_id:%lu", json_file_name_.c_str(), e.what(), session_id, graph_id);
GELOGE(FAILED,
"[Json.dump][GraphInfo]json.dump to analyze file [%s] failed because [%s],"
"session_id:%lu, graph_id:%lu",
json_file_name_.c_str(), e.what(), session_id, graph_id);
ret_failed = true;
}
json_file_.close();
@@ -241,7 +244,9 @@ ge::Status Analyzer::DoAnalyze(DataInfo &data_info) {
GE_CHECK_NOTNULL(graph_info);
auto status = SaveOpInfo(desc, data_info, graph_info);
if (status != SUCCESS) {
GELOGE(status, "[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!", desc->GetName().c_str(), desc->GetType().c_str());
GELOGE(status,
"[Check][SaveOpInfo]save op info: desc_name [%s] desc_type [%s] failed!",
desc->GetName().c_str(), desc->GetType().c_str());
return FAILED;
}
// create json file


+ 3
- 2
ge/common/CMakeLists.txt View File

@@ -16,6 +16,7 @@ set(PROTO_LIST
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST})

set(SRC_LIST
"context/ctx.cc"
@@ -127,7 +128,7 @@ target_link_libraries(ge_common PRIVATE
)

############ libge_common.a ############
add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_HDRS})
add_library(ge_common_static STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS})
target_compile_definitions(ge_common_static PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
HOST_VISIBILITY
@@ -158,7 +159,7 @@ target_include_directories(ge_common_static PRIVATE
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_static
#### yellow zone ####
${GE_DEPEND_DIR}/inc
${GE_DEPEND_DIR}/inc/cce


+ 1
- 1
ge/common/dump/dump_manager.cc View File

@@ -96,7 +96,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
dump_mode = dump_config.dump_mode;
GELOGI("Dump mode is %s", dump_mode.c_str());
dump_properties.SetDumpMode(dump_mode);
dump_properties_map_.emplace(kInferSessionId, dump_properties);
dump_properties_map_[kInferSessionId] = dump_properties;

return SUCCESS;
}


+ 36
- 7
ge/common/dump/dump_op.cc View File

@@ -20,6 +20,7 @@
#include "common/ge/datatype_util.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "framework/common/types.h"
#include "graph/anchor.h"
#include "graph/ge_tensor.h"
#include "graph/op_desc.h"
@@ -55,8 +56,10 @@ void DumpOp::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond
loop_cond_ = reinterpret_cast<uintptr_t>(loop_cond);
}

void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id) {
void DumpOp::SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name,
uint32_t dynamic_model_id) {
dynamic_model_name_ = dynamic_model_name;
dynamic_om_name_ = dynamic_om_name;
dynamic_model_id_ = dynamic_model_id;
}

@@ -200,6 +203,32 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
return SUCCESS;
}

Status DumpOp::SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info) {
if (dynamic_model_name_.empty() && dynamic_om_name_.empty()) {
GELOGI("Single op dump, no need set model name");
return SUCCESS;
}
std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
bool not_find_by_omname = model_list.find(dynamic_om_name_) == model_list.end();
bool not_find_by_modelname = model_list.find(dynamic_model_name_) == model_list.end();
std::string dump_model_name = not_find_by_omname ? dynamic_model_name_ : dynamic_om_name_;
if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
if (not_find_by_omname && not_find_by_modelname) {
std::string model_list_str;
for (auto &model : model_list) {
model_list_str += "[" + model + "].";
}
GELOGW("Model %s will not be set to dump, dump list: %s", dump_model_name.c_str(), model_list_str.c_str());
return FAILED;
}
}
if (!dump_model_name.empty() && dump_properties_.IsDumpOpen()) {
GELOGD("Dump model name is %s", dump_model_name.c_str());
op_mapping_info.set_model_name(dump_model_name);
}
return SUCCESS;
}

Status DumpOp::LaunchDumpOp() {
GELOGI("Start to launch dump op %s", op_desc_->GetName().c_str());
int32_t device_id = 0;
@@ -209,8 +238,7 @@ Status DumpOp::LaunchDumpOp() {
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
if (device_id < 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"Check device_id failed, device_id = %d, which should be not less than 0.",
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Check device_id failed, device_id = %d, which should be not less than 0.",
device_id);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
@@ -220,11 +248,12 @@ Status DumpOp::LaunchDumpOp() {
op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
op_mapping_info.set_model_id(dynamic_model_id_);
if (!dynamic_model_name_.empty() && dump_properties_.IsDumpOpen()) {
op_mapping_info.set_model_name(dynamic_model_name_);

if (SetDumpModelName(op_mapping_info) != SUCCESS) {
return SUCCESS;
}
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
GELOGI("Dump step is %s ,dump path is %s in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str());
uint32_t task_id = 0;
uint32_t stream_id = 0;
@@ -273,4 +302,4 @@ Status DumpOp::LaunchDumpOp() {
}
return SUCCESS;
}
} // namesapce ge
} // namespace ge

+ 3
- 1
ge/common/dump/dump_op.h View File

@@ -34,12 +34,13 @@ class DumpOp {
vector<uintptr_t> output_addrs, rtStream_t stream);
Status LaunchDumpOp();
void SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_cond);
void SetDynamicModelInfo(const string &dynamic_model_name, uint32_t dynamic_model_id);
void SetDynamicModelInfo(const string &dynamic_model_name, const string &dynamic_om_name, uint32_t dynamic_model_id);

private:
Status ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info);
Status DumpOutput(aicpu::dump::Task &task);
Status DumpInput(aicpu::dump::Task &task);
Status SetDumpModelName(aicpu::dump::OpMappingInfo &op_mapping_info);

DumpProperties dump_properties_;
OpDescPtr op_desc_;
@@ -54,6 +55,7 @@ class DumpOp {
uintptr_t loop_cond_;

std::string dynamic_model_name_;
std::string dynamic_om_name_;
std::uint32_t dynamic_model_id_;
};
} // namespace ge


+ 8
- 7
ge/common/dump/dump_properties.cc View File

@@ -35,14 +35,14 @@ const std::string kDumpStatusOpen = "on";
const uint32_t kAicoreOverflow = (0x1 << 0);
const uint32_t kAtomicOverflow = (0x1 << 1);
const uint32_t kAllOverflow = (kAicoreOverflow | kAtomicOverflow);
}
} // namespace
namespace ge {
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties::DumpProperties(const DumpProperties &other) {
CopyFrom(other);
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY DumpProperties &DumpProperties::operator=(
const DumpProperties &other) {
const DumpProperties &other) {
CopyFrom(other);
return *this;
}
@@ -97,7 +97,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::InitByOpti

// The following is the new dump scenario of the fusion operator
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::AddPropertyValue(
const std::string &model, const std::set<std::string> &layers) {
const std::string &model, const std::set<std::string> &layers) {
for (const std::string &layer : layers) {
GELOGI("This model %s config to dump layer %s", model.c_str(), layer.c_str());
}
@@ -138,7 +138,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpProperties::GetPropertyValue(
const std::string &model) const {
const std::string &model) const {
auto iter = model_dump_properties_map_.find(model);
if (iter != model_dump_properties_map_.end()) {
return iter->second;
@@ -147,8 +147,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> DumpPrope
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool DumpProperties::IsLayerNeedDump(
const std::string &model, const std::string &om_name, const std::string &op_name) const {
const std::string &model, const std::string &om_name, const std::string &op_name) const {
// if dump all
GELOGD("model name is %s om name is %s op is %s in layer need dump", model.c_str(), om_name.c_str(), op_name.c_str());
if (model_dump_properties_map_.find(DUMP_ALL_MODEL) != model_dump_properties_map_.end()) {
return true;
}
@@ -203,7 +204,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY const std::string &DumpProperti
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void DumpProperties::SetDumpOpSwitch(
const std::string &dump_op_switch) {
const std::string &dump_op_switch) {
dump_op_switch_ = dump_op_switch;
}

@@ -270,4 +271,4 @@ void DumpProperties::SetDumpDebugOptions() {
GELOGI("ge.exec.enableDumpDebug is false or is not set.");
}
}
} // namespace
} // namespace ge

+ 0
- 5
ge/common/dump/opdebug_register.cc View File

@@ -80,13 +80,11 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de

uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
#ifdef ONLY_COMPILE_OPEN_SRC
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
#endif
GELOGD("debug_task_id:%u, debug_stream_id:%u in stream overflow.", debug_task_id, debug_stream_id);
data_dumper.SaveOpDebugId(debug_task_id, debug_stream_id, p2p_debug_addr_, true);
return SUCCESS;
@@ -94,7 +92,6 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de

void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
rtError_t rt_ret = RT_ERROR_NONE;
#ifdef ONLY_COMPILE_OPEN_SRC
if (stream != nullptr) {
GELOGD("start call rtDebugUnRegisterForStream in unknown shape over flow.");
rt_ret = rtDebugUnRegisterForStream(stream);
@@ -102,8 +99,6 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
GELOGW("rtDebugUnRegisterForStream failed, ret: 0x%X", rt_ret);
}
}
#endif

if (op_debug_addr_ != nullptr) {
rt_ret = rtFree(op_debug_addr_);
if (rt_ret != RT_ERROR_NONE) {


+ 2
- 1
ge/common/formats/format_transfers/datatype_transfer.cc View File

@@ -154,7 +154,8 @@ Status DataTypeTransfer::TransDataType(const CastArgs &args, TransResult &result

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size);
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to alloc the memory for dst buf %zu, data size %zu", total_size, args.src_data_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}



+ 2
- 1
ge/common/formats/format_transfers/format_transfer_c1hwncoc0_hwcn.cc View File

@@ -73,7 +73,8 @@ Status CheckArgsForC1hwncoc0ToHwcn(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 4
- 2
ge/common/formats/format_transfers/format_transfer_dhwcn_fracz3D.cc View File

@@ -94,7 +94,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) {

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -122,7 +123,8 @@ Status TransFormatDhwckToFz3D(const TransArgs &args, TransResult &result) {
args.data + src_idx * data_size, static_cast<size_t>(data_size));
}
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
dst_offset, ret, pad_zero);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}


+ 4
- 2
ge/common/formats/format_transfers/format_transfer_dhwnc_fracz3D_transpose.cc View File

@@ -95,7 +95,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -123,7 +124,8 @@ Status TransFormatDhwncToFz3DTranspose(const TransArgs &args, TransResult &resul
args.data + src_idx * data_size, static_cast<size_t>(data_size));
}
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
dst_offset, ret, pad_zero);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}


+ 20
- 10
ge/common/formats/format_transfers/format_transfer_fractal_nz.cc View File

@@ -139,7 +139,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -175,7 +176,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size * w0));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -189,7 +191,8 @@ Status TransFormatFromNdToFracNz(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -210,7 +213,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -246,7 +250,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con
ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size * w0));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -260,7 +265,8 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con
ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -274,14 +280,16 @@ Status TransFormatFromFracNzToNd(const TransArgs &args, TransResult &result, con

Status FormatTransferFractalNz::TransFormat(const TransArgs &args, TransResult &result) {
if (!IsDataTypeSupport(args.src_data_type)) {
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
return ACL_ERROR_GE_DATATYPE_INVALID;
}
if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -325,7 +333,8 @@ Status FormatTransferFractalNz::TransShape(Format src_format, const ShapeVector

Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult &result) {
if (!IsDataTypeSupport(args.src_data_type)) {
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -333,7 +342,8 @@ Status FormatTransferFractalNzND::TransFormat(const TransArgs &args, TransResult
}

if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Trans format from %s to %s, src shape %s, dst shape %s, data type %s is not supported",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());


+ 17
- 9
ge/common/formats/format_transfers/format_transfer_fractal_z.cc View File

@@ -127,7 +127,8 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
dst == nullptr,
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -173,8 +174,9 @@ Status TransFormatFromNchwToFz(const TransArgs &args, TransResult &result) {
}
}
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d pad mode %d", offset,
ret, need_pad_zero);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d pad mode %d",
offset, ret, need_pad_zero);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -213,7 +215,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
dst == nullptr,
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -235,7 +238,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
static_cast<size_t>(data_size));
} else {
if (protected_size < data_size) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld",
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Failed to operate the dst memory, protected_size is %ld and size is %ld",
protected_size, data_size);
return ACL_ERROR_GE_PARAM_INVALID;
}
@@ -247,7 +251,8 @@ Status TransFormatHwcnToFz(const TransArgs &args, TransResult &result) {
}
}
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
dst_offset, ret, pad_zero);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
@@ -288,7 +293,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
dst == nullptr,
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;);
@@ -310,7 +316,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
static_cast<size_t>(data_size));
} else {
if (protected_size < data_size) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Failed to operate the dst memory, protected_size is %ld and size is %ld",
GELOGE(ACL_ERROR_GE_PARAM_INVALID,
"Failed to operate the dst memory, protected_size is %ld and size is %ld",
protected_size, data_size);
return ACL_ERROR_GE_PARAM_INVALID;
}
@@ -322,7 +329,8 @@ Status TransFormatNhwcToFz(const TransArgs &args, TransResult &result) {
}
}
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d, pad mode %d",
dst_offset, ret, pad_zero);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}


+ 20
- 10
ge/common/formats/format_transfers/format_transfer_fractal_zz.cc View File

@@ -140,7 +140,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -179,7 +180,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size * w0));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -195,7 +197,8 @@ Status TransFormatFromNdToFracZz(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -217,7 +220,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size](), std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -257,7 +261,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size * w0));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -273,7 +278,8 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con
auto ret = memcpy_s(dst.get() + dst_offset, static_cast<size_t>(protected_size), args.data + src_offset,
static_cast<size_t>(size));
if (ret != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED,
"Failed to operate the dst memory at offset %ld, error-code %d", dst_offset, ret);
return ACL_ERROR_GE_MEMORY_OPERATE_FAILED;
}
}
@@ -288,14 +294,16 @@ Status TransFormatFromFracZzToNd(const TransArgs &args, TransResult &result, con

Status FormatTransferFractalZz::TransFormat(const TransArgs &args, TransResult &result) {
if (!IsDataTypeSupport(args.src_data_type)) {
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
return ACL_ERROR_GE_DATATYPE_INVALID;
}
if (!CheckShape(args.src_format, args.src_shape) || !IsShapeValid(args.dst_shape)) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -339,7 +347,8 @@ Status FormatTransferFractalZz::TransShape(Format src_format, const ShapeVector

Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult &result) {
if (!IsDataTypeSupport(args.src_data_type)) {
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
GELOGE(ACL_ERROR_GE_DATATYPE_INVALID,
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());
@@ -347,7 +356,8 @@ Status FormatTransferFractalZzND::TransFormat(const TransArgs &args, TransResult
}

if (!IsShapeValid(args.src_shape) || !CheckShape(args.dst_format, args.dst_shape)) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Not support trans format from %s to %s, src shape %s, dst shape %s, data type %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), ShapeToString(args.src_shape).c_str(),
ShapeToString(args.dst_shape).c_str(), TypeUtils::DataTypeToSerialString(args.src_data_type).c_str());


+ 3
- 2
ge/common/formats/format_transfers/format_transfer_fracz_hwcn.cc View File

@@ -66,7 +66,7 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) {
FmtToStr(ShapeToString(dst_shape));
GE_ERRORLOG_AND_ERRORMSG(ACL_ERROR_GE_SHAPE_INVALID, error.c_str());
return ACL_ERROR_GE_SHAPE_INVALID;
}
}

return SUCCESS;
}
@@ -74,7 +74,8 @@ Status CheckArgsForFracZToHwcn(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 6
- 4
ge/common/formats/format_transfers/format_transfer_fracz_nchw.cc View File

@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) {
}
int64_t c1 = Ceil(dst_shape.at(kNchwC), c0);
int64_t n0 = Ceil(dst_shape.at(kNchwN), static_cast<int64_t>(kNiSize));
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 || src_shape.at(kFracZC0) != c0 ||
src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNchwH) * dst_shape.at(kNchwW) * c1 ||
src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
return ACL_ERROR_GE_SHAPE_INVALID;
}
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNchw(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 7
- 5
ge/common/formats/format_transfers/format_transfer_fracz_nhwc.cc View File

@@ -59,9 +59,10 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) {
}
int64_t c1 = Ceil(dst_shape.at(kNhwcC), c0);
int64_t n0 = Ceil(dst_shape.at(kNhwcN), static_cast<int64_t>(kNiSize));
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 || src_shape.at(kFracZC0) != c0 ||
src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
GELOGE(PARAM_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
if (src_shape.at(kFracZHWC1) != dst_shape.at(kNhwcH) * dst_shape.at(kNhwcW) * c1 ||
src_shape.at(kFracZC0) != c0 || src_shape.at(kFracZNi) != kNiSize || src_shape.at(kFracZN0) != n0) {
GELOGE(PARAM_INVALID,
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
return PARAM_INVALID;
}
@@ -72,7 +73,8 @@ Status CheckArgsForFracZToNhwc(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, int size, int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(OUT_OF_MEMORY,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return OUT_OF_MEMORY;
@@ -140,7 +142,7 @@ Status FormatTransferFracZNhwc::TransFormat(const TransArgs &args, TransResult &
}

GELOGE(INTERNAL_ERROR, "Get %ld total size from dst shape %s, src shape %s", total_size,
ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str());
ShapeToString(args.dst_shape).c_str(), ShapeToString(args.src_shape).c_str());
return PARAM_INVALID;
}
GELOGD("Begin to trans format from FracZ to NHWC, src shape %s, data type %s, dst shape %s, memory size %ld",


+ 2
- 1
ge/common/formats/format_transfers/format_transfer_hwcn_c1hwncoc0.cc View File

@@ -91,7 +91,8 @@ Status CheckArgsForHwcnToC1hwncoc0(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 2
- 1
ge/common/formats/format_transfers/format_transfer_nc1hwc0_nchw.cc View File

@@ -72,7 +72,8 @@ Status CheckArgsForNc1hwc0ToNchw(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(OUT_OF_MEMORY, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(OUT_OF_MEMORY,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return OUT_OF_MEMORY;


+ 4
- 2
ge/common/formats/format_transfers/format_transfer_nc1hwc0_nhwc.cc View File

@@ -61,7 +61,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) {
if (src_shape.at(kNc1hwc0H) != dst_shape.at(kNhwcH) || src_shape.at(kNc1hwc0W) != dst_shape.at(kNhwcW) ||
src_shape.at(kNc1hwc0N) != dst_shape.at(kNhwcN) || src_shape.at(kNc1hwc0C0) != c0 ||
src_shape.at(kNc1hwc0C1) != (Ceil(dst_shape.at(kNhwcC), c0))) {
GELOGE(ACL_ERROR_GE_SHAPE_INVALID, "Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
GELOGE(ACL_ERROR_GE_SHAPE_INVALID,
"Failed to check relationship between src and dst shape, src shape %s, dst shape %s",
ShapeToString(src_shape).c_str(), ShapeToString(dst_shape).c_str());
return ACL_ERROR_GE_SHAPE_INVALID;
}
@@ -72,7 +73,8 @@ Status CheckArgsForNc1hwc0ToNhwc(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 10
- 5
ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc View File

@@ -125,7 +125,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) {
return ACL_ERROR_GE_INTERNAL_ERROR);
auto t1 = h_o * w_o;
auto t2 = n_o * c_o;
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
return ACL_ERROR_GE_INTERNAL_ERROR);

int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
@@ -140,7 +141,8 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) {

std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -212,7 +214,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin
return ACL_ERROR_GE_INTERNAL_ERROR);
auto t1 = h_o * w_o;
auto t2 = n_o * c_o;
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2),
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,"int64 mul overflow.A[%ld], B[%ld]", t1, t2);
return ACL_ERROR_GE_INTERNAL_ERROR);

int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
@@ -228,7 +231,8 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin

dst.reset(new (std::nothrow) uint8_t[dst_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), dst_size);
return ACL_ERROR_GE_MEMORY_ALLOCATION;
@@ -275,7 +279,8 @@ Status FormatTransferNchwToFZC04::TransFormat(const TransArgs &args, TransResult
}

std::vector<int64_t> expect_shape;
ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type, args_tmp.dst_format, expect_shape);
ret = TransShape(args_tmp.src_format, args_tmp.src_shape, args_tmp.src_data_type,
args_tmp.dst_format, expect_shape);
if (ret != SUCCESS) {
return ret;
}


+ 2
- 1
ge/common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.cc View File

@@ -92,7 +92,8 @@ Status CheckArgsForNhwcToNc1hwc0(const TransArgs &args) {
Status GetDstDataAfterTrans(const TransArgs &args, TransResult &result, const int size, const int64_t total_size) {
std::shared_ptr<uint8_t> dst(new (std::nothrow) uint8_t[total_size], std::default_delete<uint8_t[]>());
if (dst == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION,
"Failed to trans format from %s to %s, can not alloc the memory for dst buf %ld, shape %s",
TypeUtils::FormatToSerialString(args.src_format).c_str(),
TypeUtils::FormatToSerialString(args.dst_format).c_str(), total_size, ShapeToString(args.dst_shape).c_str());
return ACL_ERROR_GE_MEMORY_ALLOCATION;


+ 7
- 5
ge/common/helper/model_helper.cc View File

@@ -87,12 +87,13 @@ Status ModelHelper::SaveSizeToModelDef(const GeModelPtr &ge_model) {

std::shared_ptr<ModelTaskDef> model_task_def = ge_model->GetModelTaskDefPtr();
if (model_task_def == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Create model task def ptr failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
GELOGD("SaveSizeToModelDef task_info_size is 0.");
om_info.push_back(0);
} else {
size_t partition_task_size = model_task_def->ByteSizeLong();
GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
om_info.push_back(partition_task_size);
}
size_t partition_task_size = model_task_def->ByteSizeLong();
GELOGD("SaveSizeToModelDef task_info_size is %zu", partition_task_size);
om_info.push_back(partition_task_size);

GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListInt(*(ge_model.get()), "om_info_list", om_info),
GELOGE(FAILED, "SetListInt of om_info_list failed.");
@@ -598,6 +599,7 @@ Status ModelHelper::GenerateGeRootModel(OmFileLoadHelper &om_load_helper) {
is_first_model = false;
root_model_->SetRootGraph(GraphUtils::GetComputeGraph(cur_model->GetGraph()));
root_model_->SetModelId(cur_model->GetModelId());
root_model_->SetModelName(cur_model->GetName());
model_ = cur_model;
continue;
}


+ 1
- 1
ge/common/profiling/profiling_manager.cc View File

@@ -31,7 +31,7 @@ const char *const kFpPoint = "fp_point";
const char *const kBpPoint = "bp_point";

#ifdef DAVINCI_SUPPORT_PROFILING
const size_t kReportMaxLen = 2048;
const size_t kReportMaxLen = 1024;
const int32_t kMaxDeviceNum = 256;
const uint32_t kInteval = 2;
const std::string kConfigNumsdev = "devNums";


+ 11
- 0
ge/common/tbe_kernel_store.cc View File

@@ -15,6 +15,8 @@
*/

#include "common/tbe_kernel_store.h"
#include "graph/utils/attr_utils.h"
#include "graph/debug/ge_attr_define.h"

namespace ge {

@@ -31,6 +33,15 @@ void TBEKernelStore::LoadTBEKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc>
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, kernel_bin),
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for kernel_bin failed");)
GELOGI("Load tbe kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize());

std::string atomic_kernel_name;
(void) AttrUtils::GetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, atomic_kernel_name);
if (!atomic_kernel_name.empty()) {
GELOGI("Get atomic kernel name is %s.", atomic_kernel_name.c_str());
auto atomic_kernel_bin = FindKernel(atomic_kernel_name);
GE_IF_BOOL_EXEC(!op_desc->SetExtAttr(EXT_ATTR_ATOMIC_TBE_KERNEL, atomic_kernel_bin),
GELOGW("LoadKernelTBEBinToOpDesc: SetExtAttr for atomic kernel_bin failed");)
}
}
}
}


+ 3
- 2
ge/executor/CMakeLists.txt View File

@@ -8,6 +8,7 @@ set(PROTO_LIST
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge_static PROTO_STATIC_SRCS PROTO_STATIC_HDRS ${PROTO_LIST})

set(SRC_LIST
"ge_executor.cc"
@@ -162,7 +163,7 @@ set(SRC_LIST
)

######## libge_executor.a ########
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_HDRS})
add_library(ge_executor STATIC ${SRC_LIST} ${PROTO_STATIC_HDRS})

target_compile_options(ge_executor PRIVATE
$<$<OR:$<STREQUAL:${TARGET_SYSTEM_NAME},Linux>,$<STREQUAL:${TARGET_SYSTEM_NAME},Android>>:-fvisibility=hidden -O2 -Werror -Wno-deprecated-declarations -fno-common>
@@ -191,7 +192,7 @@ target_include_directories(ge_executor SYSTEM PRIVATE
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_static
#### yellow zone ####
${GE_CODE_DIR}/../inc
${GE_CODE_DIR}/../inc/cce


+ 39
- 0
ge/executor/ge_executor.cc View File

@@ -30,6 +30,8 @@
#include "single_op/single_op_manager.h"
#include "graph/load/model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
#include "graph/opsproto_manager.h"
#include "ge_local_engine/engine/host_cpu_engine.h"

using std::string;
using std::vector;
@@ -199,6 +201,33 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,
namespace ge {
bool GeExecutor::isInit_ = false;

static void InitOpsProtoManager() {
string opsproto_path;
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
string file_path = RealPath(path.c_str());
if (file_path.empty()) {
GELOGE(FAILED, "[Check][EnvPath]ASCEND_OPP_PATH path [%s] is invalid.", path.c_str());
REPORT_INPUT_ERROR("E68016", {"ASCEND_OPP_PATH", path});
return;
}
opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
GELOGI("Get opsproto so path from env : %s", path.c_str());
} else {
string path_base = PluginManager::GetPath();
GELOGI("path_base is %s", path_base.c_str());
path_base = path_base.substr(0, path_base.rfind('/'));
path_base = path_base.substr(0, path_base.rfind('/') + 1);
opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
}
GELOGI("Get opsproto path is %s", opsproto_path.c_str());
OpsProtoManager *manager = OpsProtoManager::Instance();
map<string, string> option_tmp;
option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
(void)manager->Initialize(option_tmp);
}

GeExecutor::GeExecutor() {}

Status GeExecutor::Initialize() {
@@ -208,6 +237,16 @@ Status GeExecutor::Initialize() {
return ge::SUCCESS;
}

OpTilingManager::GetInstance().LoadSo();

Status init_hostcpu_engine_status = HostCpuEngine::GetInstance().Initialize();
if (init_hostcpu_engine_status != SUCCESS) {
GELOGE(init_hostcpu_engine_status, "Failed to initialize HostCpuEngine");
return init_hostcpu_engine_status;
}

InitOpsProtoManager();

std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
mem_type.push_back(RT_MEMORY_P2P_DDR);
auto ret = MemManager::Instance().Initialize(mem_type);


+ 8
- 6
ge/ge_local_engine/CMakeLists.txt View File

@@ -20,6 +20,8 @@ set(OPS_KERNEL_SRC_LIST
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge_ops_shared PROTO_OPS_SHARED_SRCS PROTO_OPS_SHARED_HDRS ${PROTO_LIST})
protobuf_generate(ge_ops_static PROTO_OPS_STATIC_SRCS PROTO_OPS_STATIC_HDRS ${PROTO_LIST})

############ libge_local_engine.so ############
add_library(ge_local_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
@@ -119,7 +121,7 @@ set_target_properties(atc_ge_local_engine PROPERTIES
)

############ libge_local_opskernel_builder.so ############
add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
add_library(ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS})

target_compile_options(ge_local_opskernel_builder PRIVATE
-Werror
@@ -143,7 +145,7 @@ target_include_directories(ge_local_opskernel_builder PRIVATE
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_ops_shared
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
@@ -166,7 +168,7 @@ target_link_libraries(ge_local_opskernel_builder PRIVATE
)

############ atclib/libge_local_opskernel_builder.so ############
add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
add_library(atc_ge_local_opskernel_builder SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_SHARED_HDRS})

target_compile_options(atc_ge_local_opskernel_builder PRIVATE
-Werror
@@ -190,7 +192,7 @@ target_include_directories(atc_ge_local_opskernel_builder PRIVATE
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_ops_shared
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####
@@ -218,7 +220,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
)

############ libge_local_opskernel_builder.a ############
add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_OPS_STATIC_HDRS})

target_compile_options(ge_local_opskernel_builder_static PRIVATE
-Werror
@@ -243,7 +245,7 @@ target_include_directories(ge_local_opskernel_builder_static PRIVATE
${METADEF_DIR}/inc/external/graph
${METADEF_DIR}/inc/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_ops_static
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####


+ 86
- 24
ge/generator/ge_generator.cc View File

@@ -67,6 +67,9 @@ bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
}
return false;
}
bool IsOptional(const ge::GeTensorDesc &tensor_desc) {
return tensor_desc.GetFormat() == ge::FORMAT_RESERVED && tensor_desc.GetDataType() == ge::DT_UNDEFINED;
}
} // namespace

namespace ge {
@@ -154,7 +157,7 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
}

static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const GeTensorDesc &tensor, int32_t index,
bool attr) {
bool attr, int32_t &data_index) {
GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);

@@ -197,9 +200,10 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const
"[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
"[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
if (attr) {
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED,
if (attr && !is_const) {
GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, data_index), return FAILED,
"[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
++data_index;
}

ge::NodePtr arg_node = graph->AddNode(data_op);
@@ -565,6 +569,44 @@ bool GeGenerator::Impl::SetOmSystemInfo(AttrHolder &obj) {
return true;
}

Status GeGenerator::SetModelNameForDump(const GeRootModelPtr &ge_root_model) {
bool is_unknown_shape = false;
Status ret = ge_root_model->CheckIsUnknownShape(is_unknown_shape);
if (ret != SUCCESS) {
GELOGE(FAILED, "[Check][IsUnknownShape]Check root model is unknown shape failed, model id:%u",
ge_root_model->GetModelId());
REPORT_CALL_ERROR("E19999", "Check root model is unknown shape failed, model id:%zu",
ge_root_model->GetModelId());
return FAILED;
}
GeModelPtr model_root = nullptr;
if (is_unknown_shape) {
model_root = MakeShared<GeModel>();
GE_CHECK_NOTNULL(model_root);
model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph()));
ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root);
}

ModelHelper model_helper;
string model_name;
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "[Check][GetModelNameStep]Get model_name failed. Param --output is invalid, root graph name: %s",
ge_root_model->GetRootGraph()->GetName().c_str());
REPORT_CALL_ERROR("E19999", "Get model_name failed. Param --output is invalid, root graph name: %s",
ge_root_model->GetRootGraph()->GetName().c_str());
return PARAM_INVALID;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_CHECK_NOTNULL(ge_model);
ge_model->SetName(model_name);
return SUCCESS;
}

Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_prefix, const vector<GeTensor> &inputs,
ModelBufferData &model, bool is_offline) {
rtContext_t ctx = nullptr;
@@ -599,20 +641,10 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
}

GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
ModelHelper model_helper;
string model_name = "";
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
ret = SetModelNameForDump(ge_root_model);
if (ret != SUCCESS) {
return ret;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
if (ret != SUCCESS) {
GELOGE(ret, "Save model failed");
@@ -663,6 +695,34 @@ namespace {
}
return SUCCESS;
}

bool CheckNoAicore(const ComputeGraphPtr &graph) {
for (const auto &node : graph->GetDirectNode()) {
if (node == nullptr) {
continue;
}
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
if (op_desc->GetOpEngineName() == kAIcoreEngine) {
return false;
}
}
return true;
}
}

void GeGenerator::RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
for (auto &input : inputs) {
GeTensorDesc input_desc = input.GetTensorDesc();
bool is_const = false;
(void)AttrUtils::GetBool(input_desc, CONST_ATTR_NAME_INPUT, is_const);
bool is_optional = IsOptional(input_desc);
if (!is_optional && !is_const) {
outputs.emplace_back(input);
}
}
}

Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
@@ -729,7 +789,9 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GELOGI("ATC parser success in single op build.");

GeRootModelPtr ge_root_model = nullptr;
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, inputs, ge_root_model));
vector<GeTensor> data_inputs;
RemoveConst(inputs, data_inputs);
GE_CHK_STATUS_RET_NOLOG(impl_->BuildModel(graph, data_inputs, ge_root_model));
map<string, GeAttrValue> op_attrs = op_desc_tmp->GetAllAttrs();
GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
@@ -745,7 +807,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in

bool all_shape = false;
(void)AttrUtils::GetBool(op_desc, kAicpuAllshape, all_shape);
if (all_shape) {
if (all_shape && CheckNoAicore(root_graph)) {
GELOGD("Get aicpu all_shape kernel!");
vector<GeTensor> inputs_dynamic;
vector<GeTensor> outputs_dynamic;
@@ -812,18 +874,19 @@ Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor

// 2. Create InputData node.
int32_t arg_index = 0;
int32_t data_index = 0;
if (inputs.empty()) {
for (const auto &input_desc : op_desc->GetAllInputsDescPtr()) {
GE_CHECK_NOTNULL_EXEC(input_desc, return INTERNAL_ERROR);
if (!IsNeedConnectInputOpForSingleOp(*input_desc)) {
continue;
}
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false));
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, *input_desc, arg_index, false, data_index));
arg_index++;
}
} else {
for (const auto &in_desc : inputs) {
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true));
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, in_desc.GetTensorDesc(), arg_index, true, data_index));
arg_index++;
}
}
@@ -882,13 +945,12 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
"ge root model has no sub model")
GeModelPtr model_root = nullptr;
if (is_unknown_shape) {
model_root = make_shared<GeModel>();
model_root->SetGraph(GraphUtils::CreateGraphFromComputeGraph(ge_root_model->GetRootGraph()));
ge_root_model->SetSubgraphInstanceNameToModel(ge_root_model->GetRootGraph()->GetName(), model_root);
model_root->SetName(ge_root_model->GetRootGraph()->GetName());
auto name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
model_root = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
} else {
model_root = ge_root_model->GetSubgraphInstanceNameToModel().begin()->second;
}
GE_CHECK_NOTNULL(model_root);
// set atc version
if (!SetAtcVersionInfo(*(model_root.get()))) {
GELOGW("SetPackageVersionInfo of atc failed!");


+ 0
- 55
ge/graph/build/graph_builder.cc View File

@@ -382,58 +382,6 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
}

static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
GE_CHECK_NOTNULL(out_anchor);
NodePtr in_node = out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);
OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
.AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
.Build();
(void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
return FAILED;
}
return SUCCESS;
}

static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
if (graph->GetGraphUnknownFlag()) {
GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
return SUCCESS;
}
for (auto &node : graph->GetDirectNode()) {
// CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
auto op_desc = node->GetOpDesc();
if (op_desc == nullptr) {
continue;
}
auto op_type = op_desc->GetType();
if (op_type == NETOUTPUT) {
for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
NodePtr in_node = peer_out_anchor->GetOwnerNode();
GE_CHECK_NOTNULL(in_node);

std::string in_node_op_type = in_node->GetType();
if (in_node_op_type == CONSTANT) {
GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
in_node->GetName().c_str(), node->GetName().c_str());
return FAILED;
}
}
}
}
}
return SUCCESS;
}

Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
com_graph->SetGraphUnknownFlag(false);
@@ -516,9 +464,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
!sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
continue;
}

GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");

if (sub_graph->GetGraphUnknownFlag()) {
// unknown shape build flow
GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),


+ 93
- 71
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -597,11 +597,13 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
int64_t size = 0;
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
GE_IF_BOOL_EXEC(size < 0,
GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s",
size, node_op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, maybe it is unknown shape node, Node_name:%s",
size, node_op_desc->GetName().c_str());
return;);
GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, "
"maybe it is unknown shape node, Node_name:%s",
size, node_op_desc->GetName().c_str());
REPORT_INNER_ERROR("E19999", "tensor_size:%ld is invalid, "
"maybe it is unknown shape node, Node_name:%s",
size, node_op_desc->GetName().c_str());
return;);
batch_all_memory_size[batch_label].emplace_back(size);
if (batch_total_size.find(batch_label) == batch_total_size.end()) {
batch_total_size[batch_label] = size;
@@ -692,23 +694,23 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
auto out_anchor = n->GetOutDataAnchor(out_index);
GE_IF_BOOL_EXEC(out_anchor == nullptr,
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] anchor is null.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
REPORT_INNER_ERROR("E19999", "output anchor is null, node_name: %s output_index: %u.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
return false;);
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
GE_IF_BOOL_EXEC(peer_in_anchor == nullptr,
GELOGE(FAILED, "[Check][Anchor]Node[%s] output[%u] peer_in_anchor 0 is null.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
REPORT_INNER_ERROR("E19999", "output anchor peer is null, node_name: %s output_index: %u.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
return false;);
auto peer_node = peer_in_anchor->GetOwnerNode();
GE_IF_BOOL_EXEC(peer_node == nullptr,
GELOGE(FAILED, "[Check][Node]Node[%s] output[%u] peer node is null.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
REPORT_INNER_ERROR("E19999", "output anchor peer node is null, node_name: %s output_index: %u.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
return false;);

// Get the continuous input type of the node, default is false
@@ -716,9 +718,9 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou
auto peer_in_node_desc = peer_node->GetOpDesc();
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr,
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] nodedesc is null.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
REPORT_INNER_ERROR("E19999", "output anchor peer op_desc is null, node_name:%s output_index:%u.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
return false;);

// If GetBool fail, is_input_continuous is false.
@@ -819,7 +821,7 @@ bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) ||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) {
GELOGE(FAILED, "[Check][OpDesc]Node[%s] output[%u] peer input node desc is null.",
n->GetName().c_str(), out_index);
n->GetName().c_str(), out_index);
REPORT_INNER_ERROR("E19999", "get output anchor peer op_desc fail, node_name: %s output_index: %u.",
n->GetName().c_str(), out_index);
return false;
@@ -1105,9 +1107,10 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
OpMemoryType mem_type, const NodePtr &n, uint32_t out_index,
const vector<bool> &workspace_reuse_flag, const bool is_op_reuse_mem,
const bool continuous, int64_t memory_type) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null, apply memory failed");
return nullptr, "[Check][Param]Input parameter n(type:node_ptr) is null.");
auto node_op_desc = n->GetOpDesc();
GE_IF_BOOL_EXEC(node_op_desc == nullptr, return nullptr);
std::string batch_label;
@@ -1159,10 +1162,12 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
}

auto block = new (std::nothrow) MemoryBlock(block_size, node_op_desc->GetStreamId(), is_reuse_memory, memory_type);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr,
REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u",
n->GetName().c_str(), out_index);
return nullptr, "[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
block == nullptr,
REPORT_INNER_ERROR("E19999", "new a memoryblock object failed. node_name:%s out_index:%u",
n->GetName().c_str(), out_index);
return nullptr,
"[New][Object]new MemoryBlock failed, node_name:%s out_index:%u", n->GetName().c_str(), out_index);

// Data and netoutput need zero copy block
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous);
@@ -1221,13 +1226,15 @@ void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutpu

Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges,
const bool is_op_reuse_mem) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null");
return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:node_ptr) is null");
return INTERNAL_ERROR, "[check][param]Input parameter n(type:NodePtr) is null.");
auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
node_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
return INTERNAL_ERROR, "[Check][Param]Input parameter n(type:OpDescPtr) is null");

// continuous output support ref only when all output ref input
bool isAllOutputRef = true;
@@ -1242,7 +1249,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in

if (!isAllOutputRef && isOutputHasRef) {
REPORT_INNER_ERROR("E19999", "continuous output node ref part input, not support now. node_name:%s",
n->GetName().c_str());
n->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Check][OutRefStatus]continuous output node ref part input, not support, node_name:%s",
n->GetName().c_str());
return INTERNAL_ERROR;
@@ -1255,7 +1262,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
if (output_op_desc == nullptr) {
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u",
n->GetName().c_str(), index);
n->GetName().c_str(), index);
GELOGE(INTERNAL_ERROR, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
return INTERNAL_ERROR;
}
@@ -1268,7 +1275,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
int64_t size = 0;
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) {
REPORT_CALL_ERROR("E19999", "get tensor_size failed, node_name:%s, output_index:%u",
n->GetName().c_str(), index);
n->GetName().c_str(), index);
GELOGE(INTERNAL_ERROR, "[Get][TensorSize]node_name:%s, output_index:%u", n->GetName().c_str(), index);
return INTERNAL_ERROR;
}
@@ -1310,7 +1317,7 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in
++(block->ref_count_);
} else {
REPORT_CALL_ERROR("E19999", "apply continuousMemory failed, node_name:%s, total_size:%ld",
n->GetName().c_str(), total_size);
n->GetName().c_str(), total_size);
GELOGE(INTERNAL_ERROR, "[Apply][ContinuousMemory]node_name:%s, total_size:%ld", n->GetName().c_str(), total_size);
return INTERNAL_ERROR;
}
@@ -1319,26 +1326,33 @@ Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<in

MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges,
const bool is_op_reuse_mem, const bool continuous) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null");
return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
n == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:NodePtr) is null");
return nullptr, "[Check][Param]Input parameter n(type:NodePtr) is null");
auto node_op_desc = n->GetOpDesc();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
node_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "Input parameter n(type:OpDescPtr) is null");
return nullptr, "[Check][Param]Input parameter n(type:OpDescPtr) is null");
MemoryBlock *block = nullptr;
NodeIndexIO node_index_io(n, index, kOut);
int64_t size = 0;
auto output_op_desc = node_op_desc->GetOutputDescPtr(index);
GE_IF_BOOL_EXEC(output_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
return nullptr);
GE_IF_BOOL_EXEC(
output_op_desc == nullptr,
REPORT_INNER_ERROR("E19999", "get output_desc failed, node_name:%s, output_index:%u",
n->GetName().c_str(), index);
GELOGE(FAILED, "[Get][OutputDesc]node_name:%s, output_index:%u", n->GetName().c_str(), index);
return nullptr);
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed"));
size_t no_align_size = 0;
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index);
return nullptr, "[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS,
REPORT_CALL_ERROR("E19999", "Get no align size failed, node_name:%s, output_index:%u",
n->GetName().c_str(), index);
return nullptr,
"[Get][TensorSize]Get no align size, node_name:%s, output_index:%u", n->GetName().c_str(), index);

std::string symbol;
bool reuse_input = false;
@@ -1346,9 +1360,9 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
block = symbol_blocks_[symbol];
GE_IF_BOOL_EXEC(block == nullptr,
REPORT_INNER_ERROR("E19999", "get ref block failed, node_name:%s, symbol:%s",
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
GELOGE(FAILED, "[Get][RefBlock]node_name:%s, symbol:%s",
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
node_op_desc->GetName().c_str(), node_index_io.ToString().c_str());
return nullptr);
// reduce old size
size_t align_size = block->Size();
@@ -1392,24 +1406,28 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index,
vector<bool> workspace_reuse_flag;
block = ApplyMemory(block_size, size, no_align_size, kOutput, n, index,
workspace_reuse_flag, is_op_reuse_mem, continuous, memory_type);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(block == nullptr,
REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u",
n->GetName().c_str(), block_size, index);
return nullptr, "[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u",
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
block == nullptr,
REPORT_CALL_ERROR("E19999", "apply out Memory failed, node_name:%s, block_size:%ld, out_index:%u",
n->GetName().c_str(), block_size, index);
return nullptr,
"[Apply][Memory]node_name:%s, block_size:%ld, out_index:%u",
n->GetName().c_str(), block_size, index);
}
int out_count = 0;
GE_IF_BOOL_EXEC(index >= n->GetAllOutDataAnchors().size(),
REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s",
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s",
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
return nullptr);
GE_IF_BOOL_EXEC(
index >= n->GetAllOutDataAnchors().size(),
REPORT_INNER_ERROR("E19999", "out index:%u exceed out_size:%lu, node_name:%s",
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
GELOGE(FAILED, "[Check][OutIndex]index:%u exceed out_size:%lu, node_name:%s",
index, n->GetAllOutDataAnchors().size(), n->GetName().c_str());
return nullptr);
auto out_data_anchor = n->GetOutDataAnchor(index);
GE_IF_BOOL_EXEC(out_data_anchor == nullptr,
REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str());
GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str());
return nullptr);
GE_IF_BOOL_EXEC(
out_data_anchor == nullptr,
REPORT_INNER_ERROR("E19999", "out anchor is null, index:%u, node_name:%s", index, n->GetName().c_str());
GELOGE(FAILED, "[Check][OutAnchor]is null, index:%u, node_name:%s", index, n->GetName().c_str());
return nullptr);
for (const auto &in_anchor : out_data_anchor->GetPeerInDataAnchors()) {
auto owner_node = in_anchor->GetOwnerNode();
auto op_desc = owner_node->GetOpDesc();
@@ -1616,12 +1634,13 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
op_desc->GetOutputsSize(), memorys_type.size());
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
REPORT_INNER_ERROR("E19999", "Attr[%s] size:%zu not equal to node output size:%zu, node_name:%s",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetName().c_str());
GELOGE(INTERNAL_ERROR,
"[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetName().c_str());
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetName().c_str());
GELOGE(
INTERNAL_ERROR,
"[Check][MemTypeAttr]Attr %s size:%zu not equal to node output size:%zu, node_name:%s",
ATTR_NAME_OUTPUT_MEM_TYPE_LIST.c_str(), memorys_type.size(),
op_desc->GetOutputsSize(), op_desc->GetName().c_str());
return INTERNAL_ERROR;
}

@@ -1748,9 +1767,11 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {

if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) {
REPORT_INNER_ERROR("E19999", "Attr[%s]size:%zu is not equal to workspace size:%zu, node_name:%s",
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str());
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(),
temp.size(), n->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Check][Attr]Attr %s size:%zu is not equal to workspace size:%zu, node_name:%s",
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(), temp.size(), n->GetName().c_str());
TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), tvm_workspace_memory_type.size(),
temp.size(), n->GetName().c_str());
return;
}
for (size_t i = 0; i < temp.size(); i++) {
@@ -2160,10 +2181,11 @@ bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index,
ge::AttrUtils::GetListInt(op_desc, TVM_ATTR_NAME_WORKSPACE_TYPE, workspace_memory_type);
if (has_workspace_mem_type_attr && (workspace_memory_type.size() <= index)) {
REPORT_INNER_ERROR("E19999", "get workspace mem_type failed, "
"index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
"index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(),
workspace_memory_type.size(), node->GetName().c_str());
GELOGE(INTERNAL_ERROR, "[Get][WorkspaceMemType]index %zu invalid, bigger than attr %s size:%zu, node_name:%s",
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
index, TVM_ATTR_NAME_WORKSPACE_TYPE.c_str(), workspace_memory_type.size(), node->GetName().c_str());
return false;
}
memory_type = has_workspace_mem_type_attr ? workspace_memory_type[index] : RT_MEMORY_HBM;


+ 3
- 3
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -496,7 +496,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
REPORT_INNER_ERROR("E19999", "find memory offset fail for mem_type:%ld, "
"when assign continuous input memory for node:%s, ", memory_type, node->GetName().c_str());
GELOGE(FAILED, "[Find][MemOffset]fail for mem_type:%ld, when AssignContinuousInputMemory for node:%s",
memory_type, node->GetName().c_str());
memory_type, node->GetName().c_str());
return FAILED;
}
// The head and tail of hcom continuous input should be added 512
@@ -929,8 +929,8 @@ Status GraphMemoryAssigner::AssignReferenceMemory() {

if (out_op_desc->GetOutputsSize() > output_list.size()) {
REPORT_INNER_ERROR("E19999", "Output size:%zu more than output offset size:%zu, judge invalid in node:%s "
"when AssignReferenceMemory",
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
"when AssignReferenceMemory",
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
GELOGE(ge::FAILED, "[Check][InnerData]Output size:%zu more than output offset size:%zu, invalid in node:%s",
out_op_desc->GetOutputsSize(), output_list.size(), node->GetName().c_str());
return ge::FAILED;


+ 46
- 0
ge/graph/build/model_builder.cc View File

@@ -574,6 +574,50 @@ Status ModelBuilder::MergeWeights() {
return SUCCESS;
}

Status ModelBuilder::SaveAtomicTBEKernel(const OpDescPtr &op_desc) {
ge::NodePtr atomic_clean_node = nullptr;
atomic_clean_node = op_desc->TryGetExtAttr("atomic_clean_node_ptr", atomic_clean_node);
if (atomic_clean_node == nullptr) {
return SUCCESS;
}

ge::OpDescPtr atomic_op_desc = atomic_clean_node->GetOpDesc();
GE_CHECK_NOTNULL(atomic_op_desc);
TBEKernelPtr tbe_kernel = atomic_op_desc->TryGetExtAttr(ge::OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
std::string kernel_name;
GeAttrValue::BYTES kernel_buffer;
(void) AttrUtils::GetStr(atomic_op_desc, ATTR_NAME_TBE_KERNEL_NAME, kernel_name);
(void) AttrUtils::GetBytes(atomic_op_desc, ATTR_NAME_TBE_KERNEL_BUFFER, kernel_buffer);
if (!kernel_name.empty() && (kernel_buffer.GetSize() > 0)) {
GE_CHECK_NOTNULL(kernel_buffer.GetData());
std::vector<char> data(kernel_buffer.GetData(), kernel_buffer.GetData() + kernel_buffer.GetSize());
tbe_kernel = MakeShared<OpKernelBin>(kernel_name, std::move(data));
GE_CHECK_NOTNULL(tbe_kernel);
}
}
if (tbe_kernel == nullptr) {
GELOGD("Atomic_clean_node doesn't have tbe_kernel.");
return SUCCESS;
}
tbe_kernel_store_.AddTBEKernel(tbe_kernel);
GELOGD("Atomic_clean_node tbe_kernel_name %s!", tbe_kernel->GetName().c_str());
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TBE_KERNEL_NAME, tbe_kernel->GetName());

std::string kernel_name;
(void) AttrUtils::GetStr(atomic_op_desc, atomic_op_desc->GetName() + "_kernelname", kernel_name);
(void) AttrUtils::SetStr(op_desc, op_desc->GetName() + "_atomic_kernelname", kernel_name);

std::string meta_data;
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_METADATA, meta_data);
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_METADATA, meta_data);

std::string json_string;
(void) AttrUtils::GetStr(atomic_op_desc, TVM_ATTR_NAME_MAGIC, json_string);
(void) AttrUtils::SetStr(op_desc, ATOMIC_ATTR_TVM_MAGIC, json_string);
return SUCCESS;
}

Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
// Add weight
ge_model.SetWeight(weight_buffer_);
@@ -607,6 +651,8 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
}
tbe_name_set.insert(tbe_kernel->GetName());
tbe_kernel_store_.AddTBEKernel(tbe_kernel);

GE_CHK_STATUS_RET(SaveAtomicTBEKernel(node_op_desc), "[Save][TBEKernel] save atomic tbekernel failed!");
}

SetModelCheckAicpuAttr(model, aicpu_op_types, aicpu_tf_op_types);


+ 2
- 0
ge/graph/build/model_builder.h View File

@@ -89,6 +89,8 @@ class ModelBuilder {
void SetModelCheckAicpuAttr(ge::Model &model, std::set<std::string> &aicpu_op_types,
std::set<std::string> &aicpu_tf_op_types);

Status SaveAtomicTBEKernel(const OpDescPtr &op_desc);

uint64_t session_id_;

map<int64_t, size_t> mem_type_to_mem_offset_;


+ 6
- 2
ge/graph/build/task_generator.cc View File

@@ -49,6 +49,7 @@ const char *const kIsLastNode = "is_last_node";
const char *const kIsInputVar = "INPUT_IS_VAR";
const char *const kIsOutputVar = "OUTPUT_IS_VAR";
const char *const kProfilingMode = "PROFILING_MODE";
const char *const kIteratorV2 = "IteratorV2";
const uint32_t kProfilingArStep = 2;
const uint64_t kProfilingFpStartLogid = 1;
const uint64_t kProfilingBpEndLogid = 2;
@@ -57,6 +58,7 @@ const uint64_t kProfilingArEndLogid = 4;
const uint64_t kProfilingIterEndLogid = 65535;
const int64_t kHashFactor = 100000;
const int64_t kInvalidGroupId = -1;
const std::set<std::string> kFpNodeTypes = {ge::DATA, ge::GETNEXT, kIteratorV2};
} // namespace
namespace ge {
TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) {
@@ -621,8 +623,10 @@ Status TaskGenerator::AutoFindFpOpIndex(const ComputeGraphPtr &graph, ProfilingP
if (op_kernel_lib_name.empty()) {
continue;
}

if (op_desc->GetType() == GETNEXT || op_desc->GetType() == DATA) {
auto type = op_desc->GetType();
std::string original_type;
(void)AttrUtils::GetStr(op_desc, ATTR_NAME_FRAMEWORK_ORIGINAL_TYPE, original_type);
if (kFpNodeTypes.find(type) != kFpNodeTypes.end() || kFpNodeTypes.find(original_type) != kFpNodeTypes.end()) {
auto out_anchor = node->GetOutDataAnchor(0);
for (auto &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) {
GE_CHECK_NOTNULL(peer_in_anchor);


+ 77
- 3
ge/graph/execute/graph_execute.cc View File

@@ -20,9 +20,12 @@
#include <string>

#include "graph/load/model_manager/model_manager.h"
#include "graph/load/model_manager/davinci_model.h"
#include "omm/csa_interact.h"

namespace ge {
using Uint32Pair = pair<uint32_t, uint32_t>;
const uint32_t kInvalidModelId = UINT32_MAX;
GraphExecutor::GraphExecutor()
: init_flag_(false),
train_graph_flag_(false),
@@ -358,7 +361,8 @@ Status GraphExecutor::ExecuteGraph(GraphId graph_id, const GeRootModelPtr &ge_ro
}

Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
const std::vector<InputTensorInfo> &input_tensor) {
const std::vector<InputTensorInfo> &input_tensor,
const RunAsyncCallback& callback) {
GELOGI("[GraphExecutor] Start to async execute graph, graph_id=%u", graph_id);
if (graph_id != last_graph_id_) {
auto ret = FreeExecuteMemory();
@@ -368,7 +372,7 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
}
last_graph_id_ = graph_id;
GE_CHECK_NOTNULL_EXEC(ge_root_model, return FAILED);
Status ret = AsyncExecuteModel(ge_root_model->GetModelId(), input_tensor);
Status ret = AsyncExecuteModel(ge_root_model, input_tensor, callback);
if (ret != SUCCESS) {
GELOGE(GE_GRAPH_SYNC_MODEL_FAILED, "[GraphExecutor] AsyncExecuteModel Error!");
return GE_GRAPH_SYNC_MODEL_FAILED;
@@ -378,11 +382,81 @@ Status GraphExecutor::ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &
return SUCCESS;
}

Status GraphExecutor::AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &inputs) {
bool CompareByLoad(const Uint32Pair &lhs, const Uint32Pair &rhs) {
return lhs.second < rhs.second;
}

uint32_t GraphExecutor::GetExecuteModelId(const GeRootModelPtr &ge_root_model) {
std::vector<uint32_t> model_ids = ge_root_model->GetAllModelId();
if (model_ids.empty()) {
return kInvalidModelId;
}
if (model_ids.size() == 1) {
return ge_root_model->GetModelId();
}
std::vector<Uint32Pair> model_id_to_loads;
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
for (auto model_id : model_ids) {
auto davinci_model = model_manager->GetModel(model_id);
auto hybrid_model = model_manager->GetHybridModel(model_id);
if (hybrid_model == nullptr) {
GE_CHECK_NOTNULL(davinci_model);
}
uint32_t input_load = hybrid_model != nullptr ? hybrid_model->GetDataInputerSize() :
davinci_model->GetDataInputerSize();
uint32_t running_load = hybrid_model != nullptr ? static_cast<uint32_t>(hybrid_model->GetRunningFlag()) :
static_cast<uint32_t>(davinci_model->GetRunningFlag());
uint32_t load = input_load + running_load;
if (load == 0) {
return model_id;
}
model_id_to_loads.emplace_back(model_id, load);
}
sort(model_id_to_loads.begin(), model_id_to_loads.end(), CompareByLoad);
if (model_id_to_loads.empty()) {
return kInvalidModelId;
}
return model_id_to_loads.begin()->first;
}

Status GraphExecutor::SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model,
const RunAsyncCallback &callback) {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
if (model_manager->IsNeedHybridLoad(*ge_root_model)) {
auto model = model_manager->GetHybridModel(model_id);
GE_CHECK_NOTNULL(model);
if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
return FAILED;
}
} else {
auto model = model_manager->GetModel(model_id);
GE_CHECK_NOTNULL(model);
if (model->SetRunAsyncListenerCallback(callback) != SUCCESS) {
GELOGE(FAILED, "SetRunAsyncListenerCallback failed.");
return FAILED;
}
}
return SUCCESS;
}

Status GraphExecutor::AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &inputs,
const RunAsyncCallback &callback) {
uint32_t model_id = GetExecuteModelId(ge_root_model);
if (model_id == kInvalidModelId) {
GELOGE(INTERNAL_ERROR, "No valid model id.");
return INTERNAL_ERROR;
}
try {
auto model_manager = ge::ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
GELOGI("RunAsync begin.model_id %u", model_id);
if (SetCallback(model_id, ge_root_model, callback) != SUCCESS) {
GELOGE(FAILED, "RunAsync: SetCallBack for model fail");
return FAILED;
}

Status ret = model_manager->DataInputTensor(model_id, inputs);
if (ret != SUCCESS) {


+ 8
- 2
ge/graph/execute/graph_execute.h View File

@@ -50,7 +50,7 @@ class GraphExecutor {
std::vector<GeTensor> &output_tensor);

ge::Status ExecuteGraphAsync(GraphId graph_id, const GeRootModelPtr &ge_root_model,
const std::vector<InputTensorInfo> &input_tensor);
const std::vector<InputTensorInfo> &input_tensor, const RunAsyncCallback &callback);

Status SetCondition(std::mutex *mutex, std::condition_variable *cond, std::shared_ptr<GraphModelListener> listener);

@@ -116,6 +116,8 @@ class GraphExecutor {

static Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

uint32_t GetExecuteModelId(const GeRootModelPtr &ge_root_model);

private:
Status PrepareInputData(const std::vector<GeTensor> &input_tensor, InputData &graph_input_data,
OutputData &graph_output_data, std::vector<InputOutputDescInfo> &output_desc);
@@ -123,7 +125,8 @@ class GraphExecutor {
Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &input_tensor,
std::vector<GeTensor> &output_tensor);

Status AsyncExecuteModel(uint32_t model_id, const std::vector<InputTensorInfo> &input_tensor);
Status AsyncExecuteModel(const GeRootModelPtr &ge_root_model, const std::vector<InputTensorInfo> &input_tensor,
const RunAsyncCallback &callback);

void InitModelIdInfo(std::vector<uint32_t> &out_model_id_info, std::vector<SubGraphInfoPtr> &sub_graph_vec,
uint32_t output_size);
@@ -132,6 +135,9 @@ class GraphExecutor {

Status MallocInOutBuffer(const std::vector<uint64_t> &buffer_size, std::vector<void *> &data_addr);

static Status SetCallback(uint32_t model_id, const GeRootModelPtr &ge_root_model,
const RunAsyncCallback &callback);

bool init_flag_;

bool train_graph_flag_;


+ 0
- 1
ge/graph/load/graph_loader.cc View File

@@ -60,7 +60,6 @@ Status GraphLoader::LoadModelOnline(uint32_t &model_id, const std::shared_ptr<ge
GELOGE(GE_GRAPH_PARAM_NULLPTR, "[LoadGraph] GE load graph model_ptr is nullptr.");
return GE_GRAPH_PARAM_NULLPTR;
}
model_id = ge_root_model_ptr->GetModelId();

auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);


+ 2
- 0
ge/graph/load/model_manager/data_inputer.h View File

@@ -134,6 +134,8 @@ class DataInputer {
///
void Stop() { queue_.Stop(); }

uint32_t Size() { return queue_.Size(); }

private:
///
/// @ingroup domi_ome


+ 96
- 35
ge/graph/load/model_manager/davinci_model.cc View File

@@ -31,6 +31,7 @@
#include "common/scope_guard.h"
#include "common/thread_pool.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/common/ge_call_wrapper.h"
#include "graph/compute_graph.h"
#include "graph/debug/ge_attr_define.h"
@@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() {
GE_CHK_STATUS(task->Release(), "Release task failed.");
}
}

for (auto &item : label_goto_args_) {
GE_FREE_RT_LOG(item.second.first);
}
label_goto_args_.clear();
}

Status DavinciModel::Assign(const GeModelPtr &ge_model) {
@@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
}
}

Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) {
std::lock_guard<std::mutex> lock(label_args_mutex_);
auto it = label_goto_args_.find(label_index);
if (it != label_goto_args_.end()) {
arg_addr = it->second.first;
arg_size = it->second.second;
return SUCCESS;
}

if (label_index >= label_list_.size()) {
GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size());
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(label_list_[label_index]);
vector<rtLabel_t> label_used = { label_list_[label_index] };

arg_size = label_used.size() * sizeof(rtLabelDevInfo);
rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

label_goto_args_[label_index] = { arg_addr, arg_size };
rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

return SUCCESS;
}

/// @ingroup ge
/// @brief LabelSet Op Initialize.
/// @param [in] op_desc: LabelSet Op descriptor.
@@ -2547,6 +2586,8 @@ void *DavinciModel::Run(DavinciModel *model) {

ErrorManager::GetInstance().SetStage(ErrorMessage::kModelExecute, ErrorMessage::kModelExecute);
while (model->RunFlag()) {
// Model hasn't truly started runing before received data
model->SetRunningFlag(false);
bool rslt_flg = true;
if (model->GetDataInputer() == nullptr) {
GELOGW("Data inputer is nullptr.");
@@ -2556,6 +2597,8 @@ void *DavinciModel::Run(DavinciModel *model) {

std::shared_ptr<InputDataWrapper> data_wrapper;
Status ret = model->GetDataInputer()->Pop(data_wrapper);
// Model run indeedly start after received data.
model->SetRunningFlag(true);
if (data_wrapper == nullptr || ret != SUCCESS) {
GELOGI("data_wrapper is null!");
continue;
@@ -2642,7 +2685,9 @@ void *DavinciModel::Run(DavinciModel *model) {

model->iterator_count_++;
model->is_first_execute_ = false;
GELOGI("run iterator count is %lu", model->iterator_count_);
// model run finished
model->SetRunningFlag(false);
GELOGI("run iterator count is %lu, model_id:%u", model->iterator_count_, model->model_id_);
}

CsaInteract::GetInstance().WriteInternalErrorCode();
@@ -2700,7 +2745,7 @@ Status DavinciModel::ModelRunStart() {

error_context_ = ErrorManager::GetInstance().GetErrorContext();
CREATE_STD_THREAD(thread_id_, DavinciModel::Run, this);
GELOGI("model tread create success, model id:%u.", model_id_);
GELOGI("model thread create success, model id:%u.", model_id_);
return SUCCESS;
}

@@ -2836,23 +2881,16 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
GELOGI("DavinciModel::UpdateKnownNodeArgs in");
GE_CHK_STATUS_RET(CreateKnownZeroCopyMap(inputs, outputs),
"DavinciModel::UpdateKnownNodeArgs create map for input/output zero copy.");
if (!base_addr_not_changed_) {
total_io_addrs_.clear();
orig_total_io_addrs_.clear();
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
auto &task = task_list_[task_index];
if (task != nullptr) {
Status ret = task->UpdateArgs();
if (ret != SUCCESS) {
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
return FAILED;
}
total_io_addrs_.clear();
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
auto &task = task_list_[task_index];
if (task != nullptr) {
Status ret = task->UpdateArgs();
if (ret != SUCCESS) {
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
return FAILED;
}
}
// cache latest iterator io addr
orig_total_io_addrs_ = total_io_addrs_;
} else {
total_io_addrs_ = orig_total_io_addrs_;
}
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_, false), "DavinciModel::UpdateKnownZeroCopyAddr failed.");

@@ -2892,6 +2930,14 @@ Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
return SUCCESS;
}

Status DavinciModel::CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const {
int64_t value = RT_CAPABILITY_SUPPORT;
auto rt_ret = rtGetRtCapability(featureType, featureInfo, &value);
GE_CHK_BOOL_RET_STATUS(rt_ret == RT_ERROR_NONE, FAILED, "call rtGetRtCapability failed!");
is_support = (value == RT_CAPABILITY_SUPPORT) ? true : false;
return SUCCESS;
}

Status DavinciModel::MallocKnownArgs() {
GELOGI("DavinciModel::MallocKnownArgs in");
const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
@@ -2910,20 +2956,22 @@ Status DavinciModel::MallocKnownArgs() {
return ret;
}
}
rtError_t rt_ret;
// malloc args memory
if (total_args_size_ == 0) {
GELOGW("DavinciModel::MallocKnownArgs total_args_size_ equals to zero.");
return SUCCESS;
}
bool is_support = false;
GE_CHK_STATUS_RET_NOLOG(CheckCapability(FEATURE_TYPE_MEMORY, MEMORY_INFO_TS_4G_LIMITED, is_support));
auto mem_type = is_support ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;

rtError_t rt_ret = rtMalloc(&args_, total_args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
if (total_args_size_ != 0) {
rt_ret = rtMalloc(&args_, total_args_size_, mem_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
// malloc dynamic and static hybrid memory
if (total_hybrid_args_size_ != 0) {
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM);
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, mem_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -2932,7 +2980,7 @@ Status DavinciModel::MallocKnownArgs() {
// malloc fixed addr memory, eg: rts op
if (total_fixed_addr_size_ != 0) {
GELOGI("Begin to allocate fixed addr.");
rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, RT_MEMORY_HBM);
rt_ret = rtMalloc(&fixed_addrs_, total_fixed_addr_size_, mem_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -3025,9 +3073,8 @@ Status DavinciModel::DistributeTask() {
task_def.kernel_ex().op_index());
OpDescPtr op = GetOpByIndex(op_index);
GE_CHECK_NOTNULL(op);

if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
bool call_dump = OpNeedDump(op->GetName()) && task->CallSaveDumpInfo();
if (call_dump || is_op_debug_reg_) {
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
}
@@ -3047,11 +3094,16 @@ Status DavinciModel::DistributeTask() {
return SUCCESS;
}

void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
bool DavinciModel::ModelNeedDump() {
auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
bool ret = all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(dump_model_name_) != all_dump_model.end() ||
all_dump_model.find(om_name_) != all_dump_model.end();
return ret;
}

void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
if (ModelNeedDump()) {
GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id);
data_dumper_.SaveEndGraphId(task_id, stream_id);
}
@@ -3851,7 +3903,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id)
}

void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &graph, const map<string, OpDescPtr> &variable_by_name) {
data_dumper_.SetModelName(name_);
if(dump_model_name_.empty()) {
dump_model_name_ = name_;
}
data_dumper_.SetModelName(dump_model_name_);
data_dumper_.SetModelId(model_id_);
data_dumper_.SetOmName(om_name_);
data_dumper_.SetComputeGraph(graph);
@@ -4040,7 +4095,7 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
Status DavinciModel::InitL1DataDumperArgs() {
auto all_dump_model = GetDumpProperties().GetAllDumpModel();
bool find_by_om_name = all_dump_model.find(om_name_) != all_dump_model.end();
bool find_by_model_name = all_dump_model.find(name_) != all_dump_model.end();
bool find_by_model_name = all_dump_model.find(dump_model_name_) != all_dump_model.end();
bool dump_l1fusion_op =
(all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end()) || find_by_om_name || find_by_model_name;
if (dump_l1fusion_op) {
@@ -4061,4 +4116,10 @@ Status DavinciModel::InitL1DataDumperArgs() {
return SUCCESS;
}

Status DavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
auto listener = dynamic_cast<RunAsyncListener *>(listener_.get());
GE_CHECK_NOTNULL(listener);
listener->SetCallback(callback);
return SUCCESS;
}
} // namespace ge

+ 29
- 4
ge/graph/load/model_manager/davinci_model.h View File

@@ -221,6 +221,11 @@ class DavinciModel {
///
DataInputer *const GetDataInputer() const { return data_inputer_; }

uint32_t GetDataInputerSize() {
GE_CHECK_NOTNULL(data_inputer_);
return data_inputer_->Size();
}

// get Stream number
uint32_t StreamNum() const { return runtime_param_.stream_num; }

@@ -248,7 +253,10 @@ class DavinciModel {
string Name() const { return name_; }

// om_name
string OmName() const { return om_name_; }
const string &OmName() const { return om_name_; }

// dump_model_name
const string &DumpModelName() const { return dump_model_name_; }

// version
uint32_t Version() const { return version_; }
@@ -273,6 +281,8 @@ class DavinciModel {

const vector<rtLabel_t> &GetLabelList() const { return label_list_; }

Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);

Status DestroyThread();

// get Op
@@ -481,6 +491,12 @@ class DavinciModel {
data_dumper_.DumpShrink();
}

bool OpNeedDump(const string &op_name) {
return GetDumpProperties().IsLayerNeedDump(dump_model_name_, om_name_, op_name);
}

bool ModelNeedDump();

void SetEndGraphId(uint32_t task_id, uint32_t stream_id);
DavinciModel &operator=(const DavinciModel &model) = delete;

@@ -528,11 +544,11 @@ class DavinciModel {
}
void SetKnownNode(bool known_node) { known_node_ = known_node; }
bool IsKnownNode() { return known_node_; }
Status CheckCapability(rtFeatureType_t featureType, int32_t featureInfo, bool &is_support) const;
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs, bool update_args = true);
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }

Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info) const;
Status GetAllAippInputOutputDims(uint32_t index, vector<InputOutputDims> &input_dims,
@@ -540,6 +556,7 @@ class DavinciModel {

// om file name
void SetOmName(const string &om_name) { om_name_ = om_name; }
void SetDumpModelName(const string &dump_model_name) { dump_model_name_ = dump_model_name; }

void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); }
const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); }
@@ -548,6 +565,10 @@ class DavinciModel {
return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info);
}

bool GetRunningFlag() const { return running_flg_; }
void SetRunningFlag(bool flag) { running_flg_ = flag; }
Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);

private:
// memory address of weights
uint8_t *weights_mem_base_;
@@ -886,6 +907,7 @@ class DavinciModel {

// used for inference data dump
string om_name_;
string dump_model_name_;

uint32_t version_;
GeModelPtr ge_model_; // release after DavinciModel::Init
@@ -911,6 +933,8 @@ class DavinciModel {
shared_ptr<ModelListener> listener_;

bool run_flg_;
// check whether model is running with data
bool running_flg_ = false;

mutex mux_run_flg_;

@@ -930,6 +954,9 @@ class DavinciModel {
vector<rtLabel_t> label_list_;
set<uint32_t> label_id_indication_;

mutex label_args_mutex_;
map<uint32_t, pair<void *, uint32_t>> label_goto_args_;

mutex outside_addrs_mutex_;
vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
set<const void *> copy_only_addrs_; // Address need copy to original place.
@@ -1002,8 +1029,6 @@ class DavinciModel {
map<const void *, void *> known_input_data_info_;
map<const void *, void *> known_output_data_info_;
vector<void *> total_io_addrs_;
vector<void *> orig_total_io_addrs_;
bool base_addr_not_changed_ = false;

vector<vector<int64_t>> batch_info_;
vector<vector<int64_t>> combined_batch_info_;


+ 37
- 19
ge/graph/load/model_manager/model_manager.cc View File

@@ -271,7 +271,7 @@ ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector<uin
return SUCCESS;
}

ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &model_name,
ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string &om_name,
const shared_ptr<ge::GeRootModel> &ge_root_model,
const shared_ptr<ModelListener> &listener) {
auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model);
@@ -279,13 +279,24 @@ ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const string
hybrid_model->SetListener(listener);
hybrid_model->SetModelId(model_id);
hybrid_model->SetDeviceId(GetContext().DeviceId());
hybrid_model->SetModelName(model_name);
hybrid_model->SetOmName(om_name);
GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id);
auto shared_model = std::shared_ptr<hybrid::HybridDavinciModel>(hybrid_model.release());
InsertModel(model_id, shared_model);
return SUCCESS;
}

bool ModelManager::IsNeedHybridLoad(ge::GeRootModel &ge_root_model) {
auto root_graph = ge_root_model.GetRootGraph();
if (root_graph == nullptr) {
GELOGE(FAILED, "no model on root model");
return false;
}
bool is_shape_unknown = root_graph->GetGraphUnknownFlag();
bool is_dsp_partitioned_graph = false;
(void)AttrUtils::GetBool(root_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dsp_partitioned_graph);
return is_shape_unknown || is_dsp_partitioned_graph || GetContext().GetHostExecFlag();
}
///
/// @ingroup domi_ome
/// @brief load model online
@@ -296,21 +307,17 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null");
if (model_id == INVALID_MODEL_ID) {
GenModelId(&model_id);
GELOGD("Generate new model_id:%u", model_id);
}
auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
string model_name = "";
bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag();
// if multi subgraph is known, do hybrid load process
if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) {
return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener);
string om_name;
if (IsNeedHybridLoad(*ge_root_model)) {
return DoLoadHybridModelOnline(model_id, om_name, ge_root_model, listener);
}

mmTimespec timespec = mmGetTickCount();
std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, listener);
if (davinci_model == nullptr) {
GELOGE(FAILED, "davinci_model is nullptr");
return FAILED;
}
GE_CHECK_NOTNULL(davinci_model);
davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * kTimeSpecNano +
timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond
davinci_model->SetId(model_id);
@@ -330,7 +337,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed.");
break;);
GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign");

/// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail.
/// These session_ids come from the same model, so the values of session_id are the same.
/// Update session_id for infer in load model to avoid the same session_id.
if (!ge_root_model->GetTrainFlag()) {
uint64_t new_session_id;
ret = GenSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed.");
ret = davinci_model->UpdateSessionId(new_session_id);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed.");
ge_model->InsertSessionMap(model_id, new_session_id);
GELOGD("Update new session id: %lu.", new_session_id);
}
GE_TIMESTAMP_START(Init);
GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;);
GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit");
@@ -343,16 +361,16 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
return ret;
}

void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
void ModelManager::InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
model_map_[id] = davinci_model;
model_map_[model_id] = davinci_model;
}

void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
void ModelManager::InsertModel(uint32_t model_id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
hybrid_model_map_[id] = hybrid_model;
hybrid_model_map_[model_id] = hybrid_model;
}

Status ModelManager::DeleteModel(uint32_t id) {
@@ -1705,7 +1723,7 @@ Status ModelManager::CheckAicpuOpList(GeModelPtr ge_model) {
bool aicpu_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckCpu", aicpu_optype_list);
bool tf_need_check = ge::AttrUtils::GetListStr(ge_model, "needCheckTf", aicpu_tf_optype_list);
if (!aicpu_need_check && !tf_need_check) {
GELOGI("Graph:%s No need to check aicpu optype.", ge_model->GetGraph().GetName().c_str());
GELOGI("No need to check aicpu optype for graph.");
return SUCCESS;
}
GE_CHK_STATUS_RET(LaunchKernelCheckAicpuOp(aicpu_optype_list, aicpu_tf_optype_list),


+ 3
- 2
ge/graph/load/model_manager/model_manager.h View File

@@ -294,6 +294,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::vector<InputOutputDims> &output_dims);

bool IsDynamicShape(uint32_t model_id);
bool IsNeedHybridLoad(ge::GeRootModel &ge_root_model);
ge::Status GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info);

ge::Status EnableExceptionDump(const std::map<string, string> &options);
@@ -329,8 +330,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
/// @ingroup domi_ome
/// @brief insert new model into model manager set
///
void InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model);
void InsertModel(uint32_t id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model);
void InsertModel(uint32_t model_id, std::shared_ptr<DavinciModel> &davinci_model);
void InsertModel(uint32_t model_id, std::shared_ptr<hybrid::HybridDavinciModel> &hybrid_model);

///
/// @ingroup domi_ome


+ 2
- 1
ge/graph/load/model_manager/model_utils.cc View File

@@ -384,7 +384,8 @@ Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDesc
switch (mem_type) {
case RT_MEMORY_RDMA_HBM:
if (offset < 0) {
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset));
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p",
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset)));
return PARAM_INVALID;
}
var_addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(offset));


+ 1
- 4
ge/graph/load/model_manager/task_info/end_graph_task_info.cc View File

@@ -45,10 +45,7 @@ Status EndGraphTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
Status EndGraphTaskInfo::Distribute() {
GELOGI("EndGraphTaskInfo Distribute Start.");
GE_CHECK_NOTNULL(davinci_model_);
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
if (davinci_model_->ModelNeedDump()) {
GELOGI("Start to call rtEndGraphEx");
rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
if (rt_ret != RT_ERROR_NONE) {


+ 1
- 2
ge/graph/load/model_manager/task_info/kernel_ex_task_info.cc View File

@@ -238,8 +238,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
}

void KernelExTaskInfo::InitDumpTask(void *addr, const OpDescPtr &op_desc) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc->GetName())) {
if (davinci_model_->OpNeedDump(op_desc->GetName())) {
dump_flag_ = RT_KERNEL_DUMPFLAG;
dump_args_ = addr;
}


+ 74
- 49
ge/graph/load/model_manager/task_info/kernel_task_info.cc View File

@@ -124,7 +124,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
return FAILED;
}

ret = InitTVMTask(args_offset_tmp[0], kernel_def);
io_addr_offset_ = args_offset_tmp[0];
ret = InitTVMTask(io_addr_offset_, kernel_def);
} else if (kernel_type_ == ccKernelType::CUSTOMIZED) {
ret = InitAICPUCustomTask(context.op_index(), kernel_def);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
@@ -380,7 +381,8 @@ Status KernelTaskInfo::Distribute() {
GELOGD("KernelTaskInfo Distribute Start.");
if (davinci_model_->IsKnownNode()) {
if (kernel_type_ == ccKernelType::TE) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_)
: davinci_model_->GetCurrentArgsAddr(args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
}
@@ -407,10 +409,7 @@ Status KernelTaskInfo::Distribute() {
call_skt, task_id_, skt_id_, skt_info.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
// l1 fusion enable and env flag open (kCloseSkt for skt debug)
bool open_dump = false;
auto all_dump_model = davinci_model_->GetDumpProperties().GetAllDumpModel();
if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
if (davinci_model_->ModelNeedDump()) {
open_dump = true;
}
if (call_skt && (env_flag != kCloseSkt) && !open_dump) {
@@ -449,29 +448,41 @@ void KernelTaskInfo::SetIoAddrs(const OpDescPtr &op_desc) {
}
}

Status KernelTaskInfo::CopyNoncontinuousArgs(uint16_t offset) {
GE_CHECK_NOTNULL(davinci_model_);
// copy new io addrs
vector<void *> io_addrs = io_addrs_;
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
auto addr_size = kAddrLen * io_addrs.size();

// copy io addr
errno_t sec_ret = memcpy_s(args_addr.get() + offset, addr_size, io_addrs.data(), addr_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}

// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("Copy noncontinuous args success, kernel type %d.", kernel_type_);
return SUCCESS;
}

Status KernelTaskInfo::UpdateArgs() {
GELOGI("KernelTaskInfo::UpdateArgs in.");
GE_CHECK_NOTNULL(davinci_model_);
if (kernel_type_ == ccKernelType::TE) {
if (l2_buffer_on_) {
return CopyNoncontinuousArgs(io_addr_offset_);
}
davinci_model_->SetTotalIOAddrs(io_addrs_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
vector<void *> io_addrs = io_addrs_;
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
auto addrs_size = sizeof(uint64_t) * io_addrs.size();
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return CopyNoncontinuousArgs(sizeof(aicpu::AicpuParamHead));
}

GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}

@@ -516,8 +527,8 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
return SUCCESS;
}

char *sm_contrl = const_cast<char *>(sm_desc.data());
rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_contrl);
char *sm_control = const_cast<char *>(sm_desc.data());
rtL2Ctrl_t *l2_ctrl_info = reinterpret_cast<rtL2Ctrl_t *>(sm_control);
uint64_t gen_base_addr = davinci_model_->GetRtBaseAddr();

// There is no weight for te op now. Update L2_mirror_addr by data memory base.
@@ -545,19 +556,31 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
return SUCCESS;
}

void KernelTaskInfo::SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model) {
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
}

void KernelTaskInfo::SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(args_size);
}

Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
GE_CHECK_NOTNULL(davinci_model);
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
uint32_t args_size = kernel_def.args_size();
if (kernel_type_ == ccKernelType::TE) {
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
if (kernel_def.sm_desc().empty()) {
SetContinuousArgs(args_size, davinci_model);
return SUCCESS;
}
l2_buffer_on_ = true;
SetNoncontinuousArgs(args_size, davinci_model);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(kernel_def.args_size());
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
SetNoncontinuousArgs(args_size, davinci_model);
}
return SUCCESS;
}
@@ -568,8 +591,23 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
// get tvm op desc
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
GE_CHECK_NOTNULL(op_desc);

args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}

Status ge_ret = UpdateL2Data(kernel_def);
// update origin l2 data
if (ge_ret != SUCCESS) {
return ge_ret;
}

if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
args_ = l2_buffer_on_ ? davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_)
: davinci_model_->GetCurrentArgsAddr(args_offset_);
InitDumpTask(offset);
return SUCCESS;
}
@@ -609,12 +647,6 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
vector<uint8_t> args_info(args_size_);
errno_t sec_ret = memcpy_s(args_info.data(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}

if ((args_size_ <= offset) || (args_size_ - offset < kAddrLen * tensor_device_addrs.size())) {
GELOGE(FAILED, "offset >= kernelInfo.argsSize or copy content beyond applied memory.");
@@ -628,7 +660,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
sec_ret = memcpy_s(args_info.data() + offset, args_size_ - offset, tensor_device_addrs.data(),
sec_ret = memcpy_s(args_addr.get() + offset, args_size_ - offset, tensor_device_addrs.data(),
kAddrLen * tensor_device_addrs.size());
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
@@ -640,19 +672,13 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GE_CHK_BOOL_TRUE_EXEC_INFO(davinci_model_->GetOpDugReg(), dump_args_ = static_cast<char *>(args_) + offset,
"Op debug is open in TVM task info");

Status ge_ret = UpdateL2Data(kernel_def);
// update origin l2 data
if (ge_ret != SUCCESS) {
return ge_ret;
}

vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (op_desc->GetType() == ATOMICADDRCLEAN) {
virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset);
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_addr.get(), args_, args_size_, offset);

GELOGD("Do InitTVMTask end");
return SUCCESS;
@@ -951,8 +977,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}

void KernelTaskInfo::InitDumpTask(uint32_t offset) {
if (davinci_model_->GetDumpProperties().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
op_desc_->GetName())) {
if (davinci_model_->OpNeedDump(op_desc_->GetName())) {
if (IsL1FusionOp(op_desc_)) {
dump_flag_ = RT_FUSION_KERNEL_DUMPFLAG;
} else {


+ 5
- 0
ge/graph/load/model_manager/task_info/kernel_task_info.h View File

@@ -129,6 +129,9 @@ class KernelTaskInfo : public TaskInfo {
bool IsL1FusionOp(const OpDescPtr &op_desc);
void SetIoAddrs(const OpDescPtr &op_desc);
void InitDumpTask(uint32_t offset);
void SetContinuousArgs(uint32_t args_size, DavinciModel *davinci_model);
void SetNoncontinuousArgs(uint32_t args_size, DavinciModel *davinci_model);
Status CopyNoncontinuousArgs(uint16_t offset);

// For super kernel
Status SaveSKTDumpInfo();
@@ -163,6 +166,8 @@ class KernelTaskInfo : public TaskInfo {
uint32_t hybrid_args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
std::unique_ptr<uint8_t[]> args_addr = nullptr;
uint16_t io_addr_offset_ = 0;
bool l2_buffer_on_ = false;
bool call_save_dump_ = false;

// aicpu ext_info device mem


+ 4
- 22
ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc View File

@@ -22,7 +22,7 @@ namespace ge {
constexpr uint8_t kGotoBranchMax = 1;

LabelGotoExTaskInfo::~LabelGotoExTaskInfo() {
GE_FREE_RT_LOG(args_);
args_ = nullptr;
GE_FREE_RT_LOG(index_value_);
}

@@ -49,30 +49,12 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
return INTERNAL_ERROR;
}

const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
if (label_index >= label_list.size()) {
GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(label_list[label_index]);
vector<rtLabel_t> label_used = { label_list[label_index] };

rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
GELOGI("memory_type: %u", memory_type);
args_size_ = kGotoBranchMax * sizeof(rtLabelDevInfo);
rtError_t rt_ret = rtMalloc(&args_, args_size_, memory_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_));

rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -85,7 +67,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
return RT_ERROR_TO_GE_STATUS(rt_ret);
}

GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_list[label_index]);
GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index);
return SUCCESS;
}



+ 8
- 0
ge/graph/manager/graph_caching_allocator.cc View File

@@ -356,6 +356,14 @@ void CachingAllocator::FreeBlocks() {
(void) FreeCachedBlocks();
}

void CachingAllocator::TryFreeBlocks() {
GELOGI("Try free blocks.");
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (allocated_blocks_.empty()) {
(void) FreeCachedBlocks();
}
}

void CachingAllocator::FreeBlockBins() {
GELOGI("Free block bins.");
std::lock_guard<std::recursive_mutex> lock(mutex_);


+ 7
- 0
ge/graph/manager/graph_caching_allocator.h View File

@@ -94,6 +94,13 @@ class CachingAllocator {
///
Status Free(uint8_t *memory_addr, uint32_t device_id = 0);

///
/// @ingroup ge_graph
/// @brief try to free memory when no memory is referenced
/// @return void
///
void TryFreeBlocks();

private:

///


+ 359
- 147
ge/graph/manager/graph_manager.cc View File

@@ -117,6 +117,11 @@ const char *const kAIcoreEngine = "AIcoreEngine";
const int32_t kDynamicDimsTypeIsGetNext = 0;
const int32_t kDynamicDimsTypeIsData = 1;
const char *const kGetNextName = "IteratorV2";
const uint32_t kInitGraphCount = 1;
const uint32_t kNotAdded = 0;
const uint32_t kStartAdd = 1;
const uint32_t kDoneAdded = 2;
const uint32_t kNeverLoaded = 0;

bool IsTailingOptimization() {
string is_tailing_optimization_option;
@@ -195,6 +200,8 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {

graph_map_.clear();
cache_helper_map_.clear();
graph_id_to_add_graph_cond_.clear();
graph_count_.clear();
init_flag_ = true;

thread_run_flag_ = true;
@@ -204,6 +211,20 @@ Status GraphManager::Initialize(const std::map<string, string> &options) {
return SUCCESS;
}

Status GraphManager::UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id) {
Status ret = SUCCESS;
for (size_t i = 0; i < ge_root_model->GetAllModelId().size(); ++i) {
uint32_t model_id = ge_root_model->GetAllModelId()[i];
GELOGI("Unload model %u.", model_id);
ret = GraphLoader::UnloadModel(model_id);
if (ret != SUCCESS) {
GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
return ret;
}
}
return ret;
}

Status GraphManager::Finalize() {
if (!init_flag_) {
GELOGW("GraphManager has not been initialized.");
@@ -234,7 +255,6 @@ Status GraphManager::Finalize() {
unload_model_ret = GE_GRAPH_GRAPH_IS_RUNNING;
continue;
}

// unload model
auto ge_root_model = graph_node->GetGeRootModel();
if (ge_root_model != nullptr && ge_root_model->GetModelId() != INVALID_MODEL_ID && graph_node->GetLoadFlag()) {
@@ -244,15 +264,14 @@ Status GraphManager::Finalize() {
unload_model_ret = FAILED;
continue;
}
ret = GraphLoader::UnloadModel(ge_root_model->GetModelId());
ret = UnloadModel(ge_root_model, iter->first);
if (ret != SUCCESS) {
GELOGW("[GraphManager] unload model failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(), iter->first);
GELOGW("[GraphManager] unload model failed, graph_id=%u.", iter->first);
unload_model_ret = ret;
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGW("[GraphManager] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
iter->first);
GELOGW("[GraphManager] rtDeviceReset failed, graphId=%u.", iter->first);
unload_model_ret = FAILED;
continue;
}
@@ -267,6 +286,7 @@ Status GraphManager::Finalize() {
}
graph_map_.clear();
cache_helper_map_.clear();
graph_count_.clear();

// graph context
if (graph_context_ != nullptr) {
@@ -317,30 +337,59 @@ Status GraphManager::InitDynamicParams(ComputeGraphPtr &compute_graph) {
return SUCCESS;
}

Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
const std::map<std::string, std::string> &options,
const OmgContext &omg_context) {
if (HasGraphNode(graph_id)) {
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST, "[GraphManager] graph exists, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
void GraphManager::SetAddGraphCondition(GraphId graph_id, uint32_t cond) {
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
graph_id_to_add_graph_cond_[graph_id] = cond;
GELOGD("Graph [id:%u] has been added.", graph_id);
}

uint32_t GraphManager::GetAddGraphCondition(GraphId graph_id) {
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
auto it = graph_id_to_add_graph_cond_.find(graph_id);
if (it != graph_id_to_add_graph_cond_.end()) {
return it->second;
} else {
GELOGD("Graph [id:%u] has not been added.", graph_id);
return kNotAdded;
}
}

auto compute_graph = GraphUtils::GetComputeGraph(graph);
if (compute_graph != nullptr) {
compute_graph->SetGraphID(graph_id);
bool graph_has_been_added = false;
if (AttrUtils::GetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, graph_has_been_added)
&& graph_has_been_added) {
GELOGE(GE_GRAPH_GRAPH_ALREADY_EXIST,
"[GraphManager] same graph object can not be added again, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_ALREADY_EXIST;
}
(void)AttrUtils::SetBool(*compute_graph, ATTR_NAME_GRAPH_HAS_BEEN_ADDED, true);
compute_graph_ = compute_graph;
void GraphManager::RemoveAddGraphCondition(GraphId graph_id) {
std::lock_guard<std::mutex> lock(add_graph_cond_mutex_);
auto it = graph_id_to_add_graph_cond_.find(graph_id);
if (it != graph_id_to_add_graph_cond_.end()) {
graph_id_to_add_graph_cond_.erase(it);
GELOGD("Successfully removed add_graph_cond of graph [id:%u].", graph_id);
} else {
GELOGE(FAILED, "compute graph is null");
return FAILED;
GELOGD("Graph [id:%u] has not been added. no need to remove.", graph_id);
}
}

Status GraphManager::CheckRepeatAdd(uint32_t graph_id, bool &is_added) {
uint32_t count = 0;
if (GetGraphCount(graph_id, count) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id);
return INTERNAL_ERROR;
}
// previous thread owns same graph_id has been in the middle of the AddGraph procession
if (count > 1 && GetAddGraphCondition(graph_id) == kStartAdd) {
std::unique_lock<std::mutex> lock(add_graph_mutex_);
GELOGD("Waitting for build end of previous thread.");
while (GetAddGraphCondition(graph_id) != kDoneAdded) {
add_graph_cv_.wait(lock);
}
GraphNodePtr graph_node;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
GELOGE(ret, "[AddGraph] GetGraphNode failed, graph_id = %u.", graph_id);
return ret;
}
is_added = true;
}
return SUCCESS;
}

void GraphManager::SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id) {
std::string session_graph_id;
if (!AttrUtils::GetStr(*compute_graph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id) || session_graph_id.empty()) {
session_graph_id = "-1_" + to_string(graph_id);
@@ -352,7 +401,24 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
}
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
}
}

Status GraphManager::NotifyWaittingGraph(uint32_t graph_id) {
uint32_t count = 0;
if (GetGraphCount(graph_id, count) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed, graph might have not been added.", graph_id);
return INTERNAL_ERROR;
}
GELOGD("Add graph finished, graph_id:%u", graph_id);
if (count > 1) {
GELOGD("Finish addgraph, graph_id:%u, graph_count:%u, start to notify.", graph_id, count);
add_graph_cv_.notify_all();
}
return SUCCESS;
}

Status GraphManager::CreateGraphNode(uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
GE_IF_BOOL_EXEC(graph_node == nullptr, GELOGE(FAILED, "GraphNode make shared failed");
return FAILED);
@@ -365,7 +431,62 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
ParseOption(options, TUNING_PATH, options_.tuning_path);
graph_node->SetGraph(graph_ptr);
graph_node->SetOptions(options);
graph_node->IncreaseLoadCount();
AddGraphNode(graph_id, graph_node);
return SUCCESS;
}

Status GraphManager::SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options) {
CompilerStages &stages = GetCompilerStages(graph_id);
stages.preparer.SetOptions(options_);
Status status = stages.optimizer.SetOptions(options_);
if (status != SUCCESS) {
GELOGE(status, "Graph optimizer set options failed.");
return status;
}
stages.builder.SetOptions(options_);
return SUCCESS;
}

Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
const std::map<std::string, std::string> &options,
const OmgContext &omg_context) {
IncreaseGraphCount(graph_id);
// validation for adding graphs of same graph_id in multi-thread secenario
// 1.previous thread owns same graph_id has finished the AddGraph procession
if (GetAddGraphCondition(graph_id) == kDoneAdded) {
GraphNodePtr graph_node;
if (GetGraphNode(graph_id, graph_node) != SUCCESS) {
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "Graph not exist while done adding previously, graph_id = %u.", graph_id);
return GE_GRAPH_GRAPH_NOT_EXIST;
}
graph_node->IncreaseLoadCount();
return SUCCESS;
}
// In multi-thread scenario, former thread owns same graph_id has been
// in the middle of the AddGraph procession while following threads have to wait until
// done adding graph of the former graph, avoiding repeatively adding same graph.
bool is_added = false;
if (CheckRepeatAdd(graph_id, is_added) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "CheckRepeatAdd for graph[id:%u] failed.", graph_id);
return INTERNAL_ERROR;
}
// The former graph (from different thread) owns same graph id has been successfully added.
if (is_added) {
return SUCCESS;
}
// Do add graph
SetAddGraphCondition(graph_id, kStartAdd);
auto compute_graph = GraphUtils::GetComputeGraph(graph);
GE_CHECK_NOTNULL(compute_graph);
compute_graph->SetGraphID(graph_id);

SetSessionGraphId(compute_graph, graph_id);

if (CreateGraphNode(graph_id, graph, options) != SUCCESS) {
GELOGE(FAILED, "Failed to create graph_node.");
return FAILED;
}

AddLocalOmgContext(graph_id, omg_context);
if (!options_.output_datatype.empty()) {
@@ -376,16 +497,18 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
return GRAPH_PARAM_INVALID;
}

CompilerStages &stages = GetCompilerStages(graph_id);
stages.preparer.SetOptions(options_);
Status status = stages.optimizer.SetOptions(options_);
if (status != SUCCESS) {
GELOGE(status, "Graph optimizer set options failed.");
return status;
if (SetStagesOptions(graph_id, options_) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Set stage options failed.");
return INTERNAL_ERROR;
}
stages.builder.SetOptions(options_);

var_acc_ctrl_.AddGraph(graph_id, compute_graph);
SetAddGraphCondition(graph_id, kDoneAdded);
// There are threads waitting for adding same graph
if (NotifyWaittingGraph(graph_id) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "NotifyWaittingGraph failed.");
return INTERNAL_ERROR;
}
return SUCCESS;
}

@@ -695,6 +818,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node,
GM_RUN_AND_DUMP_PERF("OptimizeSwitchOp", stages.preparer.SwitchOpOptimize, compute_graph);
}
GM_RUN_AND_DUMP_PERF("Optimize1", OptimizeStage1, compute_graph);
GM_RUN_AND_DUMP_PERF("OptimizeAfterStage1", stages.optimizer.OptimizeAfterStage1, compute_graph);
GM_RUN_AND_DUMP_PERF("InferShape2", compute_graph->InferShapeInNeed);

PassManager graph_pass;
@@ -895,6 +1019,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
if (!graph_node->IsAsync()) {
ret = LoadGraph(ge_root_model, graph_node);
} else {
GE_CHECK_NOTNULL(ge_root_model);
ret = LoadGraphAsync(ge_root_model, graph_node);
}
if (ret != SUCCESS) {
@@ -909,6 +1034,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
if (!graph_node->IsAsync()) {
ret = LoadGraph(ge_root_model_ptr, graph_node);
} else {
GE_CHECK_NOTNULL(ge_root_model);
ret = LoadGraphAsync(ge_root_model_ptr, graph_node);
}
if (ret != SUCCESS) {
@@ -921,6 +1047,7 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
Status GraphManager::LoadGraph(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) {
GELOGI("[LoadGraph] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId());
if (options_.run_graph_flag && ge_root_model != nullptr) {
ge_root_model->SetTrainFlag(GetTrainFlag());
// synchronization run graph with model
std::shared_ptr<GraphModelListener> model_listener = GetModelListener();
ModelIdInfo model_id_info;
@@ -1315,54 +1442,29 @@ bool GraphManager::CheckModelLoad(const GeRootModelPtr &ge_root_model, bool load
}

Status GraphManager::RemoveGraph(const GraphId &graph_id) {
auto it = to_be_deleted_graphs_.find(graph_id);
if (it != to_be_deleted_graphs_.end()) {
to_be_deleted_graphs_.erase(it);
}
GraphNodePtr graph_node = nullptr;
Status ret = GetGraphNode(graph_id, graph_node);
if (ret != SUCCESS) {
if (ret != SUCCESS || graph_node == nullptr) {
REPORT_INNER_ERROR("E19999", "Graph:%u not exist in graph_map, check invalid when GraphManager %s",
graph_id, __FUNCTION__);
GELOGE(GE_GRAPH_GRAPH_NOT_EXIST, "[GraphManager] Id %u does not exists.", graph_id);
return GE_GRAPH_GRAPH_NOT_EXIST;
}

if ((graph_node == nullptr) || (graph_node->GetRunFlag())) {
GELOGE(GE_GRAPH_GRAPH_IS_RUNNING, "[GraphManager] Id %u is running, can't be deleted.", graph_id);
return GE_GRAPH_GRAPH_IS_RUNNING;
if (graph_node->GetRunFlag()) {
// only put graph into to-be-deleted list when exceptional scenario
to_be_deleted_graphs_.insert(graph_id);
GELOGI("[GraphManager] Trying to remove running graph[Id:%u], added into to_be_deleted_graphs_.", graph_id);
return SUCCESS;
}

std::lock_guard<std::mutex> lock(unload_model_mutex_);

Status middle_ret;
rtError_t rt_ret;
const std::vector<SubGraphInfoPtr> &all_sub_graph = graph_node->GetAllSubGraph();
for (size_t i = 0; i < all_sub_graph.size(); ++i) {
// must free buffer firstly
middle_ret = all_sub_graph[i]->FreeInOutBuffer();
if (middle_ret != SUCCESS) {
GELOGE(middle_ret, "[GraphManager] RemoveGraph free mem failed, graph_id=%u.", graph_id);
ret = middle_ret;
}
if (all_sub_graph[i]->GeModelIsValid() && all_sub_graph[i]->GetModelIdInfo().model_id != INVALID_MODEL_ID) {
// unload model
GELOGI("UnloadModel via new ome.");
rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.",
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
ret = FAILED;
continue;
}
middle_ret = GraphLoader::UnloadModel(all_sub_graph[i]->GetModelIdInfo().model_id);
if (middle_ret != SUCCESS) {
GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.",
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
ret = middle_ret;
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] unload model failed, modelId=%u, graphId=%u.",
all_sub_graph[i]->GetModelIdInfo().model_id, graph_id);
ret = FAILED;
}
}
}
var_acc_ctrl_.RemoveGraph(graph_id);
RemoveGraphNode(graph_id);

@@ -1370,28 +1472,33 @@ Status GraphManager::RemoveGraph(const GraphId &graph_id) {

auto ge_root_model = graph_node->GetGeRootModel();
if (CheckModelLoad(ge_root_model, graph_node->GetLoadFlag())) {
GELOGI("Unload model %u.", ge_root_model->GetModelId());
rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
graph_id);
return FAILED;
}
middle_ret = GraphLoader::UnloadModel(ge_root_model->GetModelId());
// same graph may be added for several times, different models were created separately,
// unload them respectively.
middle_ret = UnloadModel(ge_root_model, graph_id);
if (middle_ret != SUCCESS) {
GELOGE(middle_ret, "[GraphManager:] unload model failed, modelId=%u, graph_id=%u.", ge_root_model->GetModelId(),
graph_id);
REPORT_INNER_ERROR("E19999", "UnloadModel for graph:%u failed, check unload detail in GraphLoader %s",
graph_id, __FUNCTION__);
GELOGE(middle_ret, "[GraphManager:] unload model failed, graph_id=%u.", graph_id);
ret = middle_ret;
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", ge_root_model->GetModelId(),
graph_id);
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, graph_id:%u, when GraphManager %s",
GetContext().DeviceId(), graph_id, __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id);
ret = FAILED;
}
}

RemoveCompilerStages(graph_id);
RemoveGraphCount(graph_id);
RemoveAddGraphCondition(graph_id);

GE_CHK_STATUS_RET(ret, "[GraphManager:] Remove graph failed, graph_id=%u.", graph_id);
GELOGI("[GraphManager] remove graph success, graph_id=%u.", graph_id);
@@ -2409,6 +2516,7 @@ void GraphManager::ChangeConstTypeWhenTraining(const ComputeGraphPtr &compute_gr
Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const GraphNodePtr &graph_node) {
GELOGI("[LoadGraphAsync] run_graph_flag[%d], graph_id[%u]", options_.run_graph_flag, graph_node->GetGraphId());
if (options_.run_graph_flag && ge_root_model != nullptr) {
ge_root_model->SetTrainFlag(GetTrainFlag());
// synchronization run graph with model
ModelIdInfo model_id_info;
bool is_unknown_shape = false;
@@ -2425,9 +2533,9 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
}
}
GE_TIMESTAMP_START(LoadGraph);
GE_CHECK_NOTNULL(graph_node->graph_run_async_listener_);
Status ret =
GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, graph_node->graph_run_async_listener_);
auto listener = MakeShared<RunAsyncListener>();
GE_CHECK_NOTNULL(listener);
Status ret = GraphLoader::LoadModelOnline(model_id_info.model_id, ge_root_model, listener);
GE_TIMESTAMP_EVENT_END(LoadGraph, "GraphManager::LoadGraphAsync");
if (ret != SUCCESS) {
GELOGE(ret, "[LoadGraphAsync] LoadGraphAsync Failed");
@@ -2441,6 +2549,61 @@ Status GraphManager::LoadGraphAsync(const GeRootModelPtr &ge_root_model, const G
return SUCCESS;
}

void GraphManager::ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node,
const std::vector<uint32_t> &model_ids, uint32_t graph_id, uint64_t session_id) {
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtSetDevice failed, device_id:%u, when GraphManager %s",
GetContext().DeviceId(), __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, graphId=%u.", graph_id);
return;
}
for (auto model_id : model_ids) {
uint64_t max_memory_size = 0;
Status result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size);
if (result != SUCCESS) {
continue;
}
GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id,
max_memory_size);
if (model_ids.size() > 1) {
result = ge_model->GetSessionId(model_id, session_id);
if (result != SUCCESS) {
GELOGW("[GraphManager:] get session failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
graph_id);
continue;
}
}
result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
if (result != SUCCESS) {
GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
graph_id);
}
result = GraphLoader::UnloadModel(model_id);
if (result != SUCCESS) {
GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
}
GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success.", graph_id, model_id);
}
graph_node->SetLoadFlag(false);
// Allow model to be loaded agagin without adding graph again
graph_node->SetLoadCount(graph_node->GetLoadRecord());
graph_node->SetLoadRecord(kNeverLoaded);
GeRootModelPtr ge_root_model = graph_node->GetGeRootModel();
if (ge_root_model == nullptr) {
GELOGW("ge_root_model is null, graph_id:%u", graph_id);
return;
}
ge_root_model->ClearAllModelId();
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
REPORT_CALL_ERROR("E19999", "Call rtDeviceReset failed, device_id:%u, when GraphManager %s",
GetContext().DeviceId(), __FUNCTION__);
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, graphId=%u.", graph_id);
return;
}
}

Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const GraphNodePtr &graph_node) {
GELOGI("CheckAndReleaseMemory graph_id[%u]", graph_node->GetGraphId());
int64_t value = 0;
@@ -2484,6 +2647,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
continue;
}
auto model_id = model->GetModelId();
auto model_ids = model->GetAllModelId();
// unload model not release
bool is_unknown_shape = false;
GE_CHK_STATUS_RET(model->CheckIsUnknownShape(is_unknown_shape));
@@ -2496,34 +2660,7 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra
GELOGI("CheckAndReleaseMemory graph[%u] has not been loaded.", graph_id);
continue;
}
uint64_t max_memory_size = 0;
result = GraphLoader::GetMaxUsedMemory(model_id, max_memory_size);
if (result != SUCCESS) {
continue;
}
GELOGI("CheckAndReleaseMemory try to UnloadGraph[%u], model[%u] which MaxUsedMemory[%lu].", graph_id, model_id,
max_memory_size);
rtError_t rt_ret = rtSetDevice(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] rtSetDevice failed, modelId=%u, graphId=%u.", model_id, graph_id);
continue;
}
result = GraphLoader::DestroyAicpuKernel(session_id, model_id, 0);
if (result != SUCCESS) {
GELOGW("[GraphManager:] destroy aicpu kernel failed when dynamic memory, modelId=%u, graphId=%u.", model_id,
graph_id);
}
result = GraphLoader::UnloadModel(model_id);
if (result != SUCCESS) {
GELOGW("[GraphManager:] unload model failed, modelId=%u, graphId=%u.", model_id, graph_id);
}
rt_ret = rtDeviceReset(GetContext().DeviceId());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "[GraphManager:] rtDeviceReset failed, modelId=%u, graphId=%u.", model_id, graph_id);
continue;
}
it.second->SetLoadFlag(false);
GELOGI("CheckAndReleaseMemory UnloadGraph[%u], model[%u] success and set LoadFlag to false.", graph_id, model_id);
ReleaseMemory(ge_model, it.second, model_ids, graph_id, session_id);
}

return SUCCESS;
@@ -2659,6 +2796,38 @@ void GraphManager::ConstructGeInput(const vector<InputTensorInfo> &inputs, vecto
}
}

Status GraphManager::CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args,
GraphNodePtr &graph_node, GeRootModelPtr &ge_root_model) {
if (!graph_manager->IsGraphNeedBuild(graph_node)) {
ge_root_model = graph_node->GetGeRootModel();
return SUCCESS;
}
if (graph_node->GetBuildFlag()) {
ReturnError(graph_manager, args.callback, PARAM_INVALID,
"The graph " + std::to_string(graph_node->GetGraphId()) +
" need to re-build, you should remove it"
" from GE first, then AddGraph again and rebuild it.");
graph_node->Unlock();
return PARAM_INVALID;
}
// check need incre build.
GeModelPtr ge_model = nullptr;
if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
std::vector<GeTensor> ge_inputs;
ConstructGeInput(args.input_tensor, ge_inputs);
Status ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
// release rts generate context
RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId());
if (ret != SUCCESS) {
ReturnError(graph_manager, args.callback, ret, "PreRun Failed.");
return ret;
}
}
graph_node->SetBuildFlag(true);
graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
return SUCCESS;
}

void GraphManager::PreRunThread(GraphManager *graph_manager) {
if (prctl(PR_SET_NAME, ("GE_PreRun")) != 0) {
GELOGW("Set thread name failed.");
@@ -2671,7 +2840,7 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
continue;
}

GELOGI("A new loop start.");
GELOGI("[PreRunThread] A new loop start, graph_id:%u.", args.graph_id);

ErrorManager::GetInstance().SetErrorContext(args.error_context);
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
@@ -2687,7 +2856,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
"[RunGraph] graph not exist, graph_id=" + std::to_string(args.graph_id));
return;
}

// more than one graph owns same graph_id
uint32_t count = 0;
if (graph_manager->GetGraphCount(args.graph_id, count) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "Get graph [id:%u] count failed.", args.graph_id);
return;
}
// Avoid repeatively prerun for graphs owns same graph_id in online inference concurrency
if (count > 1 && graph_node->GetBuildFlag()) {
graph_node->Lock();
GELOGD("Avoid repeatively prerun, graph_id:%u.", args.graph_id);
// In online inference concurrency senario, graph_node is allowed to be locked for 'count' times
graph_node->SetSemSize(count);
graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context,
args.input_tensor, graph_node->GetGeRootModel(), GetThreadLocalContext(), args.callback }));
GELOGI("[PreRunThread] Loop end. Start to run with cached build model.");
continue;
}
// Cannot be put ahead of the repeatively prerun judgement
graph_node->Lock();

if (graph_node->GetRunFlag()) {
@@ -2719,46 +2905,24 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
// it will not execute graph preprocess, optimize, parition, build if the graph has built successful.
GELOGI("Start for run graph async.");
GeRootModelPtr ge_root_model = nullptr;
if (graph_manager->IsGraphNeedBuild(graph_node)) {
if (graph_node->GetBuildFlag()) {
ReturnError(graph_manager, args.callback, PARAM_INVALID,
"The graph " + std::to_string(graph_node->GetGraphId()) +
" need to re-build, you should remove it"
" from GE first, then AddGraph again and rebuild it.");
ret = CheckIncreBuildAndPreRun(graph_manager, args, graph_node, ge_root_model);
if (ret != SUCCESS) {
graph_node->SetRunFlag(false);
if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) {
ReturnError(graph_manager, args.callback, ret, "CheckIncreBuildAndPreRun Failed, thread exit..");
graph_node->Unlock();
return;
} else {
ReturnError(graph_manager, graph_node, args.callback, ret,
"CheckIncreBuildAndPreRun Failed, keep geop continue!");
graph_node->Unlock();
continue;
}

// check need incre build.
GeModelPtr ge_model = nullptr;
if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
std::vector<GeTensor> ge_inputs;
ConstructGeInput(args.input_tensor, ge_inputs);
ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
// release rts generate context
RtContextUtil::GetInstance().DestroyRtContexts(args.session_id, graph_node->GetGraphId());
if (ret != SUCCESS) {
graph_node->SetRunFlag(false);
if (!ge::Analyzer::GetInstance()->IsEnableNetAnalyzeDebug()) {
ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit..");
graph_node->Unlock();
return;
} else {
ReturnError(graph_manager, graph_node, args.callback, ret, "PreRun Failed, keep geop continue!");
graph_node->Unlock();
continue;
}
}
}
graph_node->SetBuildFlag(true);
graph_manager->var_acc_ctrl_.SetGraphBuildEnd(graph_node->GetGraphId());
} else {
ge_root_model = graph_node->GetGeRootModel();
}

graph_manager->run_args_q_.Push(RunArgs( { graph_node, args.graph_id, args.session_id, args.error_context,
args.input_tensor, ge_root_model, GetThreadLocalContext(), args.callback }));
GELOGI("Loop end.");
GELOGI("[PreRunThread] Loop end.");
}
}

@@ -2855,16 +3019,13 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
continue;
}

GELOGI("A new loop start.");
GELOGI("[RunThread] A new loop start, graph_id:%u.", args.graph_id);

ErrorManager::GetInstance().SetErrorContext(args.error_context);
GetContext().SetSessionId(args.session_id);
GetThreadLocalContext() = args.context;
graph_manager->UpdateLocalOmgContext(args.graph_id);

if (args.graph_node->graph_run_async_listener_ != nullptr) {
args.graph_node->graph_run_async_listener_->SetCallback(args.callback);
}
Status ret;
// parse inputs.dims to vector<vector<uint64_t>> dynamic_dims
ret = graph_manager->ParseInputsDims(args.input_tensor);
@@ -2874,8 +3035,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
return;
}

args.graph_node->UpdateLoadFlag();
if (!args.graph_node->GetLoadFlag()) {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelLoad, ErrorMessage::kModelLoad);
args.ge_root_model->SetTrainFlag(graph_manager->GetTrainFlag());
ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node);
if (ret != SUCCESS || args.ge_root_model == nullptr) {
StopQueue(graph_manager);
@@ -2883,6 +3046,10 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
args.graph_node->Unlock();
return;
}
// control the times of graph loading in multi-thread scenario
args.graph_node->DecreaseLoadCount();
args.graph_node->IncreaseLoadRecord();

args.graph_node->SetLoadFlag(true);
GELOGI("LoadGraph[%u], model[%u] success and set LoadFlag to true.", args.graph_node->GetGraphId(),
args.ge_root_model->GetModelId());
@@ -2898,7 +3065,7 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
}

ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(),
args.input_tensor);
args.input_tensor, args.callback);
args.graph_node->SetRunFlag(false);
if (ret != SUCCESS) {
ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit.");
@@ -3314,4 +3481,49 @@ void GraphManager::RemoveCompilerStages(GraphId graph_id) {
std::lock_guard<std::mutex> lock(member_mutex_);
compiler_stages_.erase(graph_id);
}

void GraphManager::IncreaseGraphCount(GraphId graph_id) {
std::lock_guard<std::mutex> lock(graph_count_mutex_);
auto it = graph_count_.find(graph_id);
if (it == graph_count_.end()) {
graph_count_.insert({graph_id, kInitGraphCount});
GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
} else {
++graph_count_[graph_id];
GELOGD("After increaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
}
}

void GraphManager::RemoveGraphCount(GraphId graph_id) {
std::lock_guard<std::mutex> lock(graph_count_mutex_);
auto it = graph_count_.find(graph_id);
if (it == graph_count_.end()) {
GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id);
} else {
GELOGD("RemoveGraphCount success, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
graph_count_.erase(it);
}
}

void GraphManager::DecreaseGraphCount(GraphId graph_id) {
std::lock_guard<std::mutex> lock(graph_count_mutex_);
auto it = graph_count_.find(graph_id);
if (it == graph_count_.end()) {
GELOGW("Graph of id: %u has not been added, count cannot be decreased.", graph_id);
} else {
--it->second;
GELOGD("After DecreaseGraphCount, graph count of id[%u] is %u.", graph_id, graph_count_[graph_id]);
}
}

Status GraphManager::GetGraphCount(GraphId graph_id, uint32_t &count) {
std::lock_guard<std::mutex> lock(graph_count_mutex_);
auto it = graph_count_.find(graph_id);
if (it == graph_count_.end()) {
GELOGW("Graph [id:%u] has not been added.", graph_id);
return FAILED;
}
count = it->second;
return SUCCESS;
}
} // namespace ge

+ 42
- 0
ge/graph/manager/graph_manager.h View File

@@ -184,6 +184,20 @@ class GraphManager {

Status SaveCheckPointResult(const Graph &graph, const std::vector<Tensor> &outputs, map<string, Tensor> &var_results);

void RemoveGraphCount(GraphId graph_id);

void IncreaseGraphCount(GraphId graph_id);

void DecreaseGraphCount(GraphId graph_id);

Status GetGraphCount(GraphId graph_id, uint32_t &count);

void SetAddGraphCondition(GraphId graph_id, uint32_t cond);

uint32_t GetAddGraphCondition(GraphId graph_id);

void RemoveAddGraphCondition(GraphId graph_id);

private:
struct CompilerStages {
GraphPrepare preparer;
@@ -380,6 +394,24 @@ class GraphManager {
CompilerStages &GetCompilerStages(GraphId graph_id);
void RemoveCompilerStages(GraphId graph_id);

static Status CheckIncreBuildAndPreRun(GraphManager *graph_manager, const PreRunArgs &args, GraphNodePtr &graph_node,
GeRootModelPtr &ge_root_model);

void ReleaseMemory(const GeModelPtr &ge_model, GraphNodePtr &graph_node, const std::vector<uint32_t> &model_ids,
uint32_t graph_id, uint64_t session_id);

Status CheckRepeatAdd(uint32_t graph_id, bool &is_added);

Status NotifyWaittingGraph(uint32_t graph_id);

Status CreateGraphNode(uint32_t graph_id, const Graph &graph, const std::map<std::string, std::string> &options);

Status SetStagesOptions(uint32_t graph_id, const GraphManagerOptions &options);

Status UnloadModel(GeRootModelPtr ge_root_model, uint32_t graph_id);

void SetSessionGraphId(ComputeGraphPtr compute_graph, uint32_t graph_id);

std::atomic_bool thread_run_flag_;
BlockingQueue<PreRunArgs> prerun_args_q_{};
BlockingQueue<RunArgs> run_args_q_{};
@@ -415,6 +447,16 @@ class GraphManager {

std::mutex member_mutex_;
std::mutex unload_model_mutex_;
// avoid repeatively add same graph (owns same graph id)
std::mutex add_graph_mutex_;
std::mutex add_graph_cond_mutex_;
std::condition_variable add_graph_cv_;

std::map<GraphId, uint32_t> graph_id_to_add_graph_cond_;
// use for multi-thread online-infer scenario
std::set<GraphId> to_be_deleted_graphs_;
std::map<GraphId, uint32_t> graph_count_;
std::mutex graph_count_mutex_;
};
} // namespace ge



+ 9
- 0
ge/graph/manager/graph_manager_utils.cc View File

@@ -60,6 +60,15 @@ void GraphNode::Unlock() {
sem_.Pop(unused);
}

void GraphNode::IncreaseLoadCount() {
std::unique_lock<std::mutex> lock(load_count_mu_);
if (load_record_ == kMaxLoadNum) {
GELOGW("Reach the maximum of load_count:%u", kMaxLoadNum);
return;
}
++load_count_;
}

SubGraphInfo::SubGraphInfo() : subgraph_ptr_(nullptr), ge_model_ptr_(nullptr), malloc_flag_(false) {}

SubGraphInfo::~SubGraphInfo() {


+ 19
- 0
ge/graph/manager/graph_manager_utils.h View File

@@ -55,6 +55,7 @@ using ConstGraphPtr = std::shared_ptr<const ge::Graph>;
using GraphPtr = std::shared_ptr<ge::Graph>;

const uint64_t INVALID_SESSION_ID = 0xffffffffffffffffULL;
const uint32_t kMaxLoadNum = 8;

struct ModelIdInfo {
uint32_t model_id{INVALID_MODEL_ID};
@@ -162,6 +163,8 @@ class GraphNode {
bool GetBuildFlag() const { return build_flag_; }
void SetBuildFlag(bool buildFlag) { build_flag_ = buildFlag; }
bool GetLoadFlag() const { return load_flag_; }
// allow repeatively load graph owns same graph id
void UpdateLoadFlag() { load_flag_ = load_count_ == 0 || load_record_ >= kMaxLoadNum; }
void SetLoadFlag(bool load_flag) { load_flag_ = load_flag; }
void SetGeModel(const GeModelPtr &ge_model) { ge_model_ = ge_model; }
GeModelPtr GetGeModel() const { return ge_model_; }
@@ -172,6 +175,16 @@ class GraphNode {
void Lock();
void Unlock();

void SetSemSize(uint32_t size) { sem_.SetMaxSize(size); }

uint32_t GetLoadCount() const { return load_count_; }
void SetLoadCount(uint32_t count) { load_count_ = count; }
uint32_t GetLoadRecord() const { return load_record_; }
void SetLoadRecord(uint32_t record) { load_record_ = record; }
void IncreaseLoadRecord() { ++load_record_; }
void IncreaseLoadCount();
void DecreaseLoadCount() { --load_count_; }

// run graph asynchronous listener
std::shared_ptr<RunAsyncListener> graph_run_async_listener_;

@@ -184,11 +197,17 @@ class GraphNode {
GraphPtr graph_;
ComputeGraphPtr compute_graph_;
bool build_flag_;
// load_flag_ is true if more than 1 model were loaded
bool load_flag_;
bool async_;
GeModelPtr ge_model_;
GeRootModelPtr ge_root_model_;
BlockingQueue<uint8_t> sem_;
// consist with graph_count of same graph_id in graph_manager
uint32_t load_count_ = 0;
// total times of loading a graph with same graph_id.
uint32_t load_record_ = 0;
std::mutex load_count_mu_;
};

using GraphNodePtr = std::shared_ptr<GraphNode>;


+ 40
- 2
ge/graph/optimize/graph_optimize.cc View File

@@ -144,7 +144,7 @@ Status GraphOptimize::OptimizeOriginalGraph(ComputeGraphPtr &compute_graph) {
}

auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
GELOGI("optimize by opskernel in original graph optimize phase. num of graph_optimizer is %lu.",
GELOGI("optimize by opskernel in original graph optimize phase. num of graph_optimizer is %zu.",
graph_optimizer.size());
string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
GELOGD("[OptimizeOriginalGraph]: engine type will exclude: %s", exclude_core_Type.c_str());
@@ -179,7 +179,7 @@ Status GraphOptimize::OptimizeOriginalGraphJudgeInsert(ComputeGraphPtr &compute_
}

auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
GELOGI("optimize by opskernel in original graph optimize phase. num of graph_optimizer is %lu.",
GELOGI("optimize by opskernel in judging insert phase. num of graph_optimizer is %zu.",
graph_optimizer.size());
string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
if (graph_optimizer.size() != 0) {
@@ -266,6 +266,44 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr
return ret;
}

Status GraphOptimize::OptimizeAfterStage1(ComputeGraphPtr &compute_graph) {
GE_CHECK_NOTNULL(compute_graph);
GELOGD("OptimizeAfterStage1 in");
if (GetContext().GetHostExecFlag()) {
// graph exec on host, no need OptimizeAfterStage1
return SUCCESS;
}

Status ret = SUCCESS;
std::shared_ptr<GELib> instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
REPORT_INNER_ERROR("E19999", "Gelib not init before, check invalid");
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "OptimizeAfterStage1 failed.");
return GE_CLI_GE_NOT_INITIALIZED;
}

auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
GELOGI("Optimize by ops kernel in after stage1 phase, num of graph_optimizer is %zu.", graph_optimizer.size());
string exclude_core_type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
if (graph_optimizer.size() != 0) {
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {
if (iter->first == exclude_core_type) {
GELOGI("[OptimizeAfterStage1]: engine type will exclude:%s.", exclude_core_type.c_str());
continue;
}
GELOGI("Begin to optimize graph after stage1 by engine %s.", iter->first.c_str());
ret = (iter->second)->OptimizeAfterStage1(*compute_graph);
if (ret != SUCCESS) {
REPORT_INNER_ERROR("E19999", "Call OptimizeAfterStage1 failed, ret:%d, engine_name:%s, "
"graph_name:%s.", ret, iter->first.c_str(), compute_graph->GetName().c_str());
GELOGE(ret, "[OptimizeAfterStage1]: graph optimize failed, ret:%d.", ret);
return ret;
}
}
}
return ret;
}

Status GraphOptimize::SetOptions(const ge::GraphManagerOptions &options) {
if (options.framework_type >= static_cast<int32_t>(domi::FrameworkType::FRAMEWORK_RESERVED)) {
GELOGE(GE_GRAPH_OPTIONS_INVALID, "Optimize Type %d invalid.", options.framework_type);


+ 3
- 0
ge/graph/optimize/graph_optimize.h View File

@@ -58,6 +58,9 @@ class GraphOptimize {
// for rts optimize before build to add attr and insert memcpy op
Status OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_graph);

// optimize whole graph, using after stage1
Status OptimizeAfterStage1(ComputeGraphPtr &graph);

// set options
Status SetOptions(const GraphManagerOptions &options);



+ 39
- 6
ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -126,11 +126,11 @@ bool AtomicAddrCleanPass::IsOutputIndexPeerInputAtomic(const NodePtr &node, int6

bool AtomicAddrCleanPass::CheckSkipInsertInLoopGraph(const NodePtr &node) {
OpDescPtr op_desc = node->GetOpDesc();
std::map<string, std::map<int, int>> node_workspace_offset;
std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset);
if (!has_atomic_input && has_atomic_output && node_workspace_offset.empty()) {
atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
if (!has_atomic_input && has_atomic_output && atomic_workspace_index_size.empty()) {
std::vector<int64_t> atomic_output_index;
(void) ge::AttrUtils::GetListInt(op_desc, ATOMIC_ATTR_OUTPUT_INDEX, atomic_output_index);
bool is_all_output_peer_also_atomic = true;
@@ -222,6 +222,39 @@ Status AtomicAddrCleanPass::HandleNormalGraph(ComputeGraphPtr &graph, const vect
}
}
}
return LinkToPotentialPrecedenceNode(graph, clean_addr_node);
}

// Add control edges from atomic clean node to all potential precedence nodes which may execute before atomic clean
// node. We hope that atomic clean node can execute with the highest priority in the entire graph. Because of stream
// concurrency mechanism, only placing it at the head can not ensure that priority. Therefore, we need to add control
// edges from atomic clean node to the nodes that may be the first node on each stream. Generally, the first nodes on
// each stream are successors of Data/Variable, and Data/Variable won't generate task or execute, so we link to the
// successors of Data/Variable.
Status AtomicAddrCleanPass::LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node) {
GELOGD("Start to add control edges from %s to all second-nodes behind first-nodes which have no input.",
atomic_clean_node->GetName().c_str());
auto out_ctrl_anchor = atomic_clean_node->GetOutControlAnchor();
GE_CHECK_NOTNULL(out_ctrl_anchor);

for (const auto &node : graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
bool need_handle = (node->GetType() == DATA || node->GetType() == VARIABLE) && node->GetInAllNodes().empty();
if (!need_handle) {
continue;
}
auto second_nodes = node->GetOutAllNodes();
for (const auto &second_node : second_nodes) {
GE_CHECK_NOTNULL(second_node);
auto in_ctrl_anchor = second_node->GetInControlAnchor();
GE_CHECK_NOTNULL(in_ctrl_anchor);
if (!out_ctrl_anchor->IsLinkedWith(in_ctrl_anchor)) {
GE_CHK_STATUS_RET(out_ctrl_anchor->LinkTo(in_ctrl_anchor));
GELOGD("Add control edge from %s to %s.", atomic_clean_node->GetName().c_str(), second_node->GetName().c_str());
}
}
}

return SUCCESS;
}

@@ -332,11 +365,11 @@ bool AtomicAddrCleanPass::IsAtomicOp(const NodePtr &node) {
}

// 2.Check atomic attr in node
std::map<string, std::map<int, int>> node_workspace_offset;
std::map<string, std::map<int64_t, int64_t>> atomic_workspace_index_size;
bool has_atomic_input = op_desc->HasAttr(ATOMIC_ATTR_INPUT_INDEX);
bool has_atomic_output = op_desc->HasAttr(ATOMIC_ATTR_OUTPUT_INDEX);
node_workspace_offset = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_OFFSET, node_workspace_offset);
if (!has_atomic_input && !has_atomic_output && node_workspace_offset.empty()) {
atomic_workspace_index_size = op_desc->TryGetExtAttr(EXT_ATTR_ATOMIC_WORKSPACE_INFO, atomic_workspace_index_size);
if (!has_atomic_input && !has_atomic_output && atomic_workspace_index_size.empty()) {
return false;
}



+ 8
- 0
ge/graph/passes/atomic_addr_clean_pass.h View File

@@ -68,6 +68,14 @@ class AtomicAddrCleanPass : public GraphPass {
Status LinkToAtomicNode(const NodePtr &atomic_node, NodePtr &atomic_clean_node);

/**
* Link atomic clean node to all potential precedence nodes which may execute before atomic clean node
* @param graph
* @param atomic_clean_node
* @return
*/
Status LinkToPotentialPrecedenceNode(ComputeGraphPtr &graph, NodePtr &atomic_clean_node);

/**
* Check if this node is atomic op.
* @param node
* @return


+ 0
- 1
ge/graph/passes/attach_stream_label_pass.cc View File

@@ -137,7 +137,6 @@ Status AttachStreamLabelPass::AttachFlag(const NodePtr &node, std::string &strea
return INTERNAL_ERROR;
}
stream_label = node->GetInDataNodes().at(0)->GetName();
GE_CHK_STATUS_RET(SetStreamLabel(node, stream_label), "Set stream label failed.");
bool value = false;
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);


+ 61
- 42
ge/graph/passes/base_pass.cc View File

@@ -30,8 +30,15 @@ constexpr int kMaxRePassTimes = 10000;
constexpr size_t kMaxOneInNodes = 1000;
// Each iteration, we take about 0.3k memory on the stack, we should change the recursion to loop later
constexpr int kMaxRecursiveDepth = 20;
struct DuringPassNodeSets {
std::unordered_set<Node *> nodes_seen;
std::unordered_set<NodePtr> nodes_deleted;
std::unordered_set<NodePtr> nodes_re_pass;
std::unordered_set<NodePtr> nodes_re_pass_immediately;
std::unordered_set<NodePtr> nodes_last;
};

void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &input_edge_nodes,
void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::deque<NodePtr> &input_edge_nodes,
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) {
nodes_last.clear();
for (auto &node : graph->GetDirectNode()) {
@@ -40,7 +47,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i
}
size_t in_nums = node->GetInNodes().size();
if (in_nums == 0) {
input_edge_nodes.push(node);
input_edge_nodes.push_back(node);
nodes_seen.insert(node.get());
} else if (in_nums > kMaxOneInNodes) {
nodes_last.insert(node);
@@ -48,7 +55,7 @@ void GetAllNodesNoInputEdge(const ComputeGraphPtr &graph, std::queue<NodePtr> &i
}
}

void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &nodes_to_pass,
void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::deque<NodePtr> &nodes_to_pass,
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_last) {
for (auto &node : nodes) {
if (node == nullptr) {
@@ -60,13 +67,30 @@ void AddNextIterNodes(const Node::Vistor<NodePtr> &nodes, std::queue<NodePtr> &n

bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen);
if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) {
nodes_to_pass.push(node);
nodes_to_pass.push_back(node);
}
}
}

Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unordered_set<NodePtr> &nodes_re_pass,
std::unordered_set<NodePtr> &nodes_deleted, std::unordered_set<Node *> &nodes_seen) {
void PushToRePassIfSeen(NodePtr &node, const std::pair<std::string, BaseNodePass *> &name_to_pass,
std::unordered_set<Node *> &nodes_seen, std::unordered_set<NodePtr> &nodes_to_re_pass,
std::unordered_set<NodePtr> &nodes_re_pass) {
for (const auto &node_to_re_pass : nodes_to_re_pass) {
if (node_to_re_pass == nullptr) {
GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(),
node->GetName().c_str(), node->GetType().c_str());
continue;
}
if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
GELOGD("The node %s will be re-pass.", node_to_re_pass->GetName().c_str());
nodes_re_pass.insert(node_to_re_pass);
} else {
GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str());
}
}
}

Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, DuringPassNodeSets &during_pass_node_set) {
if (node == nullptr) {
GELOGE(FAILED, "parameter is null.");
return FAILED;
@@ -90,22 +114,15 @@ Status RunPasses(NodePtr &node, const NamesToPass &names_to_passes, std::unorder
}

auto nodes_to_re_pass = name_to_pass.second->GetNodesNeedRePass();
for (const auto &node_to_re_pass : nodes_to_re_pass) {
if (node_to_re_pass == nullptr) {
GELOGW("Found null re-pass node when executing %s on node %s type %s", name_to_pass.first.c_str(),
node->GetName().c_str(), node->GetType().c_str());
continue;
}
if (nodes_seen.count(node_to_re_pass.get()) > 0 || node_to_re_pass->IsAllInNodesSeen(nodes_seen)) {
GELOGD("The node %s will be re-pass later", node_to_re_pass->GetName().c_str());
nodes_re_pass.insert(node_to_re_pass);
} else {
GELOGD("The node %s are not all seen, don't set repass this time", node_to_re_pass->GetName().c_str());
}
}
PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass,
during_pass_node_set.nodes_re_pass);

auto nodes_to_re_pass_immediately = name_to_pass.second->GetNodesNeedRePassImmediately();
PushToRePassIfSeen(node, name_to_pass, during_pass_node_set.nodes_seen, nodes_to_re_pass_immediately,
during_pass_node_set.nodes_re_pass_immediately);

auto nodes_deleted_by_pass = name_to_pass.second->GetNodesDeleted();
nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
during_pass_node_set.nodes_deleted.insert(nodes_deleted_by_pass.begin(), nodes_deleted_by_pass.end());
if (nodes_deleted_by_pass.count(node) > 0) {
GELOGD("The node %s was deleted by pass %s, stop the remain passes", node->GetName().c_str(),
name_to_pass.first.c_str());
@@ -181,36 +198,33 @@ Status GEPass::Run(const NamesToPass &names_to_passes) {

Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
GELOGD("Begin to run pass on graph, passes count %zu", names_to_passes.size());
std::queue<NodePtr> nodes;
std::unordered_set<Node *> nodes_seen;
std::unordered_set<NodePtr> nodes_deleted;
std::unordered_set<NodePtr> nodes_re_pass;
std::unordered_set<NodePtr> nodes_last;
GetAllNodesNoInputEdge(graph_, nodes, nodes_seen, nodes_last);
std::deque<NodePtr> nodes;
DuringPassNodeSets during_pass_node_set;
GetAllNodesNoInputEdge(graph_, nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last);
GELOGD("Start points count %zu", nodes.size());
int re_pass_times = 0;

do {
for (auto &node : nodes_re_pass) {
nodes.push(node);
nodes_seen.insert(node.get());
for (auto &node : during_pass_node_set.nodes_re_pass) {
nodes.push_back(node);
during_pass_node_set.nodes_seen.insert(node.get());
}
nodes_re_pass.clear();
during_pass_node_set.nodes_re_pass.clear();

while (!nodes.empty()) {
NodePtr node = nodes.front();
nodes.pop();
nodes.pop_front();

(void)nodes_re_pass.erase(node);
(void)during_pass_node_set.nodes_re_pass.erase(node);
GE_IF_BOOL_EXEC(node == nullptr, GELOGW("node is null"); continue);
if (nodes_deleted.count(node) > 0) {
if (during_pass_node_set.nodes_deleted.count(node) > 0) {
GELOGD("The node %s was deleted before, skip it.", node->GetName().c_str());
continue;
}

AddNextIterNodes(node->GetOutNodes(), nodes, nodes_seen, nodes_last);
AddNextIterNodes(node->GetOutNodes(), nodes, during_pass_node_set.nodes_seen, during_pass_node_set.nodes_last);

auto ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen);
auto ret = RunPasses(node, names_to_passes, during_pass_node_set);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u",
node->GetName().c_str(), node->GetType().c_str(), ret);
@@ -227,7 +241,7 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
if (has_sub_graph) {
GELOGD("There are subgraphs on node %s, run passes for for the second time", node->GetName().c_str());
SetFlagOption(kOptimizeAfterSubGraph, names_to_passes);
ret = RunPasses(node, names_to_passes, nodes_re_pass, nodes_deleted, nodes_seen);
ret = RunPasses(node, names_to_passes, during_pass_node_set);
if (ret != SUCCESS) {
GELOGE(ret, "Failed to process passes on node %s type %s, error code: %u",
node->GetName().c_str(), node->GetType().c_str(), ret);
@@ -239,16 +253,21 @@ Status GEPass::RunPassesOneGraph(const NamesToPass &names_to_passes) {
// should be called each time at the begin of the iteration
ClearOption(names_to_passes);
}
for (const auto &node : during_pass_node_set.nodes_re_pass_immediately) {
GELOGD("The node %s will be re-pass immediately.", node->GetName().c_str());
nodes.push_front(node);
}
during_pass_node_set.nodes_re_pass_immediately.clear();
}

for (auto &node : nodes_last) {
bool all_in_nodes_seen = node->IsAllInNodesSeen(nodes_seen);
if (all_in_nodes_seen && nodes_seen.insert(node.get()).second) {
nodes.push(node);
for (auto &node : during_pass_node_set.nodes_last) {
bool all_in_nodes_seen = node->IsAllInNodesSeen(during_pass_node_set.nodes_seen);
if (all_in_nodes_seen && during_pass_node_set.nodes_seen.insert(node.get()).second) {
nodes.push_back(node);
}
}
nodes_last.clear();
} while ((!nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes);
during_pass_node_set.nodes_last.clear();
} while ((!during_pass_node_set.nodes_re_pass.empty() || !nodes.empty()) && ++re_pass_times < kMaxRePassTimes);

if (re_pass_times == kMaxRePassTimes) {
GELOGW("re_pass_times should not come to %d", kMaxRePassTimes);


+ 12
- 0
ge/graph/passes/base_pass.h View File

@@ -53,6 +53,8 @@ class BaseNodePass {

std::unordered_set<NodePtr> GetNodesNeedRePass() { return nodes_need_re_pass_; }

std::unordered_set<NodePtr> GetNodesNeedRePassImmediately() { return nodes_need_re_pass_immediately_; }

std::unordered_set<NodePtr> GetNodesDeleted() { return nodes_deleted_; }

void SetOption(NodePassOption option, const std::string &value) { options_[option] = value; }
@@ -62,6 +64,7 @@ class BaseNodePass {
void init() {
nodes_need_re_pass_.clear();
nodes_deleted_.clear();
nodes_need_re_pass_immediately_.clear();
}

protected:
@@ -80,6 +83,14 @@ class BaseNodePass {
void AddRePassNode(NodePtr &node) { nodes_need_re_pass_.insert(node); }

///
/// Add a node to be optimized immediately again. If you add a new node to the graph, or
/// change a node connections, and you want to make sure the node will be
/// optimized by other passes, call this function.
/// @param node
///
void AddImmediateRePassNode(NodePtr &node) { nodes_need_re_pass_immediately_.insert(node); }

///
/// Add a node and it's input/output data nodes to be optimized again.
/// @param node
///
@@ -109,6 +120,7 @@ class BaseNodePass {

private:
std::unordered_set<NodePtr> nodes_need_re_pass_;
std::unordered_set<NodePtr> nodes_need_re_pass_immediately_;
std::unordered_set<NodePtr> nodes_deleted_;
std::map<NodePassOption, std::string> options_;
};


+ 15
- 0
ge/graph/passes/infershape_pass.cc View File

@@ -25,6 +25,7 @@

namespace ge {
Status InferShapePass::Run(NodePtr &node) {
// kOptimizeAfterSubGraph exist means after subgraph
auto ret = ShapeRefiner::InferShapeAndType(node, !OptionExists(kOptimizeAfterSubGraph));
if (ret != GRAPH_SUCCESS) {
// select INFERSHAPE failed info
@@ -41,6 +42,20 @@ Status InferShapePass::Run(NodePtr &node) {
GELOGE(GE_GRAPH_INFERSHAPE_FAILED, "infershape failed. node: %s", node->GetName().c_str());
return GE_GRAPH_INFERSHAPE_FAILED;
}
bool need_repass = false;
auto has_attr = AttrUtils::GetBool(node->GetOpDesc(), "need_infer_again_", need_repass);
if (has_attr) {
if (!OptionExists(kOptimizeAfterSubGraph)) {
return SUCCESS;
}
if (need_repass) {
AddImmediateRePassNode(node);
GELOGD("Node %s need repass immediately.", node->GetName().c_str());
} else {
// clear attr on while
node->GetOpDesc()->DelAttr("need_infer_again_");
}
}
return SUCCESS;
}
} // namespace ge

+ 3
- 2
ge/graph/passes/merge_input_memcpy_pass.cc View File

@@ -23,7 +23,9 @@ namespace ge {
Status MergeInputMemcpyPass::Run(ComputeGraphPtr graph) {
GELOGD("MergeInputMemcpyPass Enter");
for (const auto &node : graph->GetDirectNode()) {
if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) {
std::string type;
GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed.");
if ((type != MERGE) && (type != REFMERGE)) {
continue;
}
GE_CHECK_NOTNULL(node->GetOpDesc());
@@ -95,4 +97,3 @@ NodePtr MergeInputMemcpyPass::CreateMemcpyAsyncNode(const ComputeGraphPtr &graph
return graph->AddNode(op_desc);
}
} // namespace ge


+ 3
- 1
ge/graph/passes/merge_to_stream_merge_pass.cc View File

@@ -25,7 +25,9 @@ Status MergeToStreamMergePass::Run(ComputeGraphPtr graph) {

bypass_nodes_.clear();
for (const auto &node : graph->GetDirectNode()) {
if ((node->GetType() != MERGE) && (node->GetType() != REFMERGE)) {
std::string type;
GE_CHK_STATUS_RET(GetOriginalType(node, type), "Get node type failed.");
if ((type != MERGE) && (type != REFMERGE)) {
continue;
}



+ 4
- 2
ge/graph/passes/next_iteration_pass.cc View File

@@ -101,7 +101,8 @@ Status NextIterationPass::FindWhileGroups() {
const std::string &frame_name = loop_group_iter.first;
for (const auto &enter_node : loop_group_iter.second->enter_nodes) {
for (const auto &out_node : enter_node->GetOutAllNodes()) {
const string &type = out_node->GetType();
std::string type;
GE_CHK_STATUS_RET(GetOriginalType(out_node, type), "Get node type failed.");
if ((type != MERGE) && (type != REFMERGE)) {
continue;
}
@@ -310,7 +311,8 @@ Status NextIterationPass::FindTargetNode(const NodePtr &node, const std::string
}

for (const auto &tmp_node : nodes) {
const std::string type = tmp_node->GetType();
std::string type;
GE_CHK_STATUS_RET(GetOriginalType(tmp_node, type), "Get node type failed.");
if ((target_type == LOOPCOND) && (type == target_type)) {
target_node = tmp_node;
break;


+ 7
- 1
ge/graph/passes/pass_utils.cc View File

@@ -35,9 +35,9 @@
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/utils/type_utils.h"
#include "utils/node_utils.h"

namespace ge {

Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data,
std::vector<GeTensorPtr> &v_output, const bool scalar_output) {
Status ret = SUCCESS;
@@ -246,6 +246,12 @@ NodePtr PassUtils::GetInDataNode(const ConstNodePtr &node, int index) {
return src_node;
}

NodePtr PassUtils::GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index) {
auto src_node = GetInDataNode(node, index);

return NodeUtils::GetInNodeCrossSubgraph(src_node);
}

bool PassUtils::IsNeedTrainIteFlowCtrl(const ComputeGraphPtr &compute_graph) {
if (compute_graph == nullptr) {
return false;


+ 2
- 0
ge/graph/passes/pass_utils.h View File

@@ -30,6 +30,8 @@ class PassUtils {

static NodePtr GetInDataNode(const ConstNodePtr &node, int index);

static NodePtr GetInNodeCrossSubgraphByIndex(const ConstNodePtr &node, int index);

static bool IsConstant(const ConstNodePtr &node);

static Status SetOutNodeWeight(const OutDataAnchorPtr &out_data_anchor, const NodePtr &src_node);


+ 1
- 1
ge/graph/passes/subexpression_migration_pass.cc View File

@@ -279,7 +279,7 @@ Status SubexpressionMigrationPass::GraphNodeMigration(const ComputeGraphPtr &gra
const auto &in_anchor = in_anchors.at(i);
const auto &base_node = in_anchor->GetOwnerNode();
GELOGD("Get Data direct node: %s", base_node->GetName().c_str());
if (!base_node->GetHostNode()) {
if (!base_node->GetHostNode() || base_node->GetType() == SWITCH) {
continue;
}



+ 8
- 2
ge/graph/passes/switch_dead_branch_elimination.cc View File

@@ -94,6 +94,12 @@ Status SwitchDeadBranchElimination::DeleteSwitchNode(NodePtr &node, NodePtr &pre
GELOGE(FAILED, "parameter is null.");
return FAILED;
}

// If two nodes aren't in same graph, get node's direct in_node instead of pred_node.
if (node->GetOwnerComputeGraph() != pred_node->GetOwnerComputeGraph()) {
pred_node = PassUtils::GetInDataNode(node, kPredInputIndex);
}

// link pred's in control nodes to switch
if (GraphUtils::CopyInCtrlEdges(pred_node, node) != GRAPH_SUCCESS) {
return FAILED;
@@ -131,7 +137,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) {
return SUCCESS;
}

auto pred_node = PassUtils::GetInDataNode(node, kPredInputIndex);
auto pred_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kPredInputIndex);
if (pred_node == nullptr) {
GELOGD("[%s] Pred input is null.", node->GetName().c_str());
return SUCCESS;
@@ -143,7 +149,7 @@ Status SwitchDeadBranchElimination::Run(NodePtr &node) {
return SUCCESS;
}

auto input_node = PassUtils::GetInDataNode(node, kDataInputIndex);
auto input_node = PassUtils::GetInNodeCrossSubgraphByIndex(node, kDataInputIndex);
if (input_node == nullptr) {
GELOGD("[%s] Data input is null.", node->GetName().c_str());
return SUCCESS;


+ 2
- 0
ge/graph/passes/switch_to_stream_switch_pass.cc View File

@@ -448,6 +448,8 @@ Status SwitchToStreamSwitchPass::CombineSwitchNode(const ComputeGraphPtr &graph)

// select first stream_switch
NodePtr stream_switch = switch_list.front();
// set stream_label
GE_CHK_STATUS_RET(SetStreamLabel(stream_switch, cast_node->GetName()), "Set stream label failed.");
OpDescPtr switch_desc = stream_switch->GetOpDesc();
GE_CHECK_NOTNULL(switch_desc);
switch_desc->SetName(CheckDuplicateName(cond_group + "/" + STREAMSWITCH + (true_branch_flag ? "_t" : "_f")));


+ 2
- 2
ge/graph/preprocess/graph_preprocess.cc View File

@@ -1772,8 +1772,8 @@ Status GraphPrepare::CheckUserInput(const std::vector<GeTensor> &user_input) {
if (dim < UNKNOWN_DIM_NUM) {
std::string situation = "data dim[" + std::to_string(i) + "][" + std::to_string(dim) + "]" ;
std::string reason = "it need >= -2";
REPORT_INPUT_ERROR(
"E19025", std::vector<std::string>({"situation", "reason"}),std::vector<std::string>({situation, reason}));
REPORT_INPUT_ERROR("E19025", std::vector<std::string>({"situation", "reason"}),
std::vector<std::string>({situation, reason}));
GELOGE(GE_GRAPH_INIT_FAILED, "[Check][InputDim]data dim %zu is not supported, need >= -2, real:%ld.", i, dim);
return GE_GRAPH_INIT_FAILED;
}


+ 2
- 1
ge/graph/preprocess/insert_op/ge_aipp_op.cc View File

@@ -428,7 +428,8 @@ Status AippOp::ConvertRelatedInputNameToRank() {
if (!convert_flag) {
string error_msg = "Top name " + related_input_name + "convert rank failed, Please"
" ensure top name in aipp config is the top name of data node.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID;
}



+ 9
- 5
ge/graph/preprocess/insert_op/util_insert_aipp_op.cc View File

@@ -124,13 +124,15 @@ Status InsertNewOpUtil::CheckInputNamePositionNotRepeat() {
if (another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_name and related_input_rank!"
" Please ensure param is the same with the first aipp config(related_input_name).";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID;
}
if (item->related_input_name() == another_item->related_input_name()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_name"
" param is different in different aipp config.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID;
}
}
@@ -150,13 +152,15 @@ Status InsertNewOpUtil::CheckInputRankPositionNoRepeat() {
if (!another_item->related_input_name().empty()) {
string error_msg = "Can not both set related_input_rank and related_input_name!"
" Please ensure param is the same with the first aipp config(related_input_rank).";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID;
}
if (item->related_input_rank() == another_item->related_input_rank()) {
string error_msg = "Can not insert aipp to the same postion! Please ensure related_input_rank"
" param is different in different aipp config.";
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error_msg.c_str());
GELOGE(PARAM_INVALID, "[Check][InputParam]%s", error_msg.c_str());
REPORT_INPUT_ERROR("E19021", std::vector<std::string>({"reason"}), std::vector<std::string>({error_msg}));
return PARAM_INVALID;
}
}
@@ -212,7 +216,7 @@ Status InsertNewOpUtil::CheckGraph(const ComputeGraphPtr &graph) {
}
}
}
GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt),
GE_CHK_LOG_AND_ERRORMSG((aippNodes.size() == 0) || (aippNodes.size() == next_nodes_cnt),
PARAM_INVALID,
"Can not config part of outputs of Data node to support AIPP, config all "
"of the outputs of Data to support AIPP, or config none of them");


+ 3
- 2
ge/host_cpu_engine/CMakeLists.txt View File

@@ -3,6 +3,7 @@ set(PROTO_LIST
)

protobuf_generate(ge PROTO_SRCS PROTO_HDRS ${PROTO_LIST})
protobuf_generate(ge_atcstub PROTO_ATCSTUB_SRCS PROTO_ATCSTUB_HDRS ${PROTO_LIST})

set(SRC_LIST
"engine/host_cpu_engine.cc"
@@ -61,7 +62,7 @@ target_link_libraries(host_cpu_engine PRIVATE
)

############ atcstub/libhost_cpu_engine.so ############
add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_HDRS})
add_library(atc_host_cpu_engine SHARED ${SRC_LIST} ${PROTO_ATCSTUB_HDRS})

target_compile_options(atc_host_cpu_engine PRIVATE
-Werror
@@ -84,7 +85,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE
${METADEF_DIR}/inc/external
${METADEF_DIR}/inc/external/graph
${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/proto/ge
${CMAKE_BINARY_DIR}/proto/ge_atcstub
#### yellow zone ####
${GE_CODE_DIR}/../inc
#### blue zone ####


+ 2
- 1
ge/host_kernels/gather_v2_kernel.cc View File

@@ -407,7 +407,8 @@ Status GatherV2Kernel::Compute(const OpDescPtr op_desc_ptr, const vector<ConstGe
// check input data type
auto x_data_type = tensor0->GetTensorDesc().GetDataType();
if (supported_type.find(x_data_type) == supported_type.end()) {
GELOGI("GatherV2Kernel does not support this Data type:%s.", TypeUtils::DataTypeToSerialString(x_data_type).c_str());
GELOGI("GatherV2Kernel does not support this Data type:%s.",
TypeUtils::DataTypeToSerialString(x_data_type).c_str());
return NOT_CHANGED;
}
// calc output shape


+ 2
- 1
ge/hybrid/executor/hybrid_execution_context.h View File

@@ -68,9 +68,10 @@ struct GraphExecutionContext {
DumpProperties dump_properties;
bool trace_enabled = false;
bool dump_enabled = false;
std::atomic_bool is_eos_;
std::atomic_bool is_eos_{false};
long profiling_level = 0;
long iteration = 0;
void *global_step = nullptr;

private:
Status status = SUCCESS;


+ 11
- 10
ge/hybrid/executor/hybrid_model_async_executor.cc View File

@@ -46,10 +46,6 @@ void HybridModelAsyncExecutor::SetModelId(uint32_t model_id) {
model_id_ = model_id;
}

void HybridModelAsyncExecutor::SetModelName(const string &model_name) {
om_name_ = model_name;
}

Status HybridModelAsyncExecutor::EnqueueData(const shared_ptr<InputDataWrapper> &data) {
GE_CHK_STATUS_EXEC(data_inputer_->Push(data), return domi::DATA_QUEUE_ISFULL,
"Data queue is full, please call again later, model_id %u ", model_id_);
@@ -67,6 +63,7 @@ Status HybridModelAsyncExecutor::Start(const std::shared_ptr<ModelListener> &lis
future_ = std::async(std::launch::async, [&]() -> Status {
GetThreadLocalContext() = *executor_->GetContext()->ge_context;
GetContext().SetSessionId(executor_->GetContext()->session_id);
GetContext().SetContextId(executor_->GetContext()->context_id);
return RunInternal();
});

@@ -105,7 +102,7 @@ Status HybridModelAsyncExecutor::Init() {
executor_ = std::unique_ptr<HybridModelExecutor>(new(std::nothrow) HybridModelExecutor(model_, device_id_, stream_));
GE_CHECK_NOTNULL(executor_);
GE_CHK_STATUS_RET(executor_->Init(), "Failed to init hybrid engine");
GE_CHK_STATUS_RET(DumpOpDebug(),"Dump op debug failed in hybrid engine");
GE_CHK_STATUS_RET(DumpOpDebug(), "Dump op debug failed in hybrid engine");

GELOGI("HybridModel stage nums:%zu", model_->GetRootGraphItem()->NumGroups());
if (model_->GetRootGraphItem()->NumGroups() >= kMinimumPiplineStages) {
@@ -136,12 +133,14 @@ Status HybridModelAsyncExecutor::RunInternal() {
GE_MAKE_GUARD(not_used_var, [&] { GE_CHK_RT(rtDeviceReset(device_id)); });

while (run_flag_) {
// Model has not indeedly started running before received data
SetRunningFlag(false);
std::shared_ptr<InputDataWrapper> data_wrapper;
Status ret = data_inputer_->Pop(data_wrapper);
if (data_wrapper == nullptr || ret != SUCCESS) {
GELOGI("data_wrapper is null!, ret = %u", ret);
continue;
}
// Model indeedly start running
SetRunningFlag(true);
GE_IF_BOOL_EXEC(data_wrapper == nullptr || ret != SUCCESS, GELOGI("data_wrapper is null!, ret = %u", ret);
continue);

GELOGI("Getting the input data, model_id:%u", model_id_);
GE_IF_BOOL_EXEC(!run_flag_, break);
@@ -166,6 +165,7 @@ Status HybridModelAsyncExecutor::RunInternal() {
} else {
GELOGI("HybridModel will execute in singleline mode");
ge::GetContext().SetSessionId(executor_->GetContext()->session_id);
ge::GetContext().SetContextId(executor_->GetContext()->context_id);
ret = executor_->Execute(args);
}
ret = HandleResult(ret, current_data.index, args, data_wrapper->GetOutput());
@@ -176,7 +176,8 @@ Status HybridModelAsyncExecutor::RunInternal() {

RECORD_MODEL_EXECUTION_EVENT(executor_->GetContext(), "[RunInternal] [iteration = %d] End", iterator_count_);
iterator_count_++;
GELOGI("run iterator count is %lu", iterator_count_);
SetRunningFlag(false);
GELOGI("run iterator count is %lu, model_id:%u", iterator_count_, model_id_);
}

CsaInteract::GetInstance().WriteInternalErrorCode();


+ 8
- 3
ge/hybrid/executor/hybrid_model_async_executor.h View File

@@ -51,12 +51,16 @@ class HybridModelAsyncExecutor {

void SetModelId(uint32_t model_id);

void SetModelName(const string &model_name);

Status Stop();

Status EnqueueData(const std::shared_ptr<InputDataWrapper> &data);

uint32_t GetDataInputerSize() { return data_inputer_->Size(); }

bool GetRunningFlag() const { return running_flag_; }

void SetRunningFlag(bool flag) { running_flag_ = flag; }

private:
Status InitInputDesc();

@@ -86,6 +90,8 @@ class HybridModelAsyncExecutor {
uint32_t device_id_ = 0U;
uint32_t model_id_ = 0U;
std::atomic_bool run_flag_;
// check whether model is running with data
bool running_flag_ = false;
std::unique_ptr<DataInputer> data_inputer_;
std::unique_ptr<HybridModelExecutor> executor_;
std::unique_ptr<HybridModelPipelineExecutor> pipe_executor_;
@@ -97,7 +103,6 @@ class HybridModelAsyncExecutor {
std::map<uint32_t, GeTensorDescPtr> input_tensor_desc_;
std::vector<bool> is_input_dynamic_;
std::shared_ptr<ModelListener> listener_;
string om_name_;
DataDumper data_dumper_;
bool is_op_debug_reg_ = false;
OpdebugRegister op_debug_register_;


+ 15
- 1
ge/hybrid/executor/hybrid_model_executor.cc View File

@@ -46,7 +46,10 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
GELOGD("Start to execute model.");
auto root_graph_item = model_->GetRootGraphItem();
GE_CHECK_NOTNULL(root_graph_item);

if (context_.global_step != nullptr) {
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
}
SubgraphExecutor executor(model_->GetRootGraphItem(), &context_);
auto ret = ExecuteGraphInternal(executor, args);
Cleanup();
@@ -98,6 +101,7 @@ Status HybridModelExecutor::InitExecutionContext() {
GE_CHK_RT_RET(rtCtxCreate(&context_.rt_gen_context, RT_CTX_GEN_MODE, 0));
GE_CHK_RT_RET(rtCtxSetCurrent(context_.rt_context));

context_.global_step = model_->GetGlobalStep();
context_.stream = stream_;
context_.model = model_;
context_.is_eos_ = false;
@@ -130,6 +134,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context
string ctx_id = std::to_string(context.context_id);
RuntimeInferenceContext::DestroyContext(ctx_id);
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
RuntimeInferenceContext *ctx = nullptr;
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
for (auto &host_tensor : context.model->GetHostTensors()) {
auto node_id = host_tensor.first;
for (const auto &output_idx_and_tensor : host_tensor.second) {
auto output_idx = output_idx_and_tensor.first;
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
}
}
return SUCCESS;
}
} // namespace hybrid


+ 10
- 0
ge/hybrid/executor/hybrid_model_pipeline_executor.cc View File

@@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) {
string ctx_id = std::to_string(context.context_id);
RuntimeInferenceContext::DestroyContext(ctx_id);
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext");
RuntimeInferenceContext *ctx = nullptr;
GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context");
for (auto &host_tensor : context.model->GetHostTensors()) {
auto node_id = host_tensor.first;
for (const auto &output_idx_and_tensor : host_tensor.second) {
auto output_idx = output_idx_and_tensor.first;
GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx);
ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone());
}
}
return SUCCESS;
}



+ 6
- 4
ge/hybrid/executor/node_state.cc View File

@@ -35,12 +35,14 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item(
node_item.NodeName().c_str(),
this->num_pending_shapes_);

for (int i = 0; i < node_item.num_inputs; ++i){
input_tensor_desc.emplace_back(*node_item.MutableInputDesc(i));
input_tensor_desc.resize(node_item.num_inputs);
for (int i = 0; i < node_item.num_inputs; ++i) {
node_item.GetInputDesc(i, input_tensor_desc[i]);
}

for (int i = 0; i < node_item.num_outputs; ++i){
output_tensor_desc.emplace_back(*node_item.MutableOutputDesc(i));
output_tensor_desc.resize(node_item.num_outputs);
for (int i = 0; i < node_item.num_outputs; ++i) {
node_item.GetOutputDesc(i, output_tensor_desc[i]);
}
}



+ 2
- 2
ge/hybrid/executor/subgraph_executor.cc View File

@@ -227,6 +227,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
if (node_item.is_dynamic) {
auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
GetContext().SetSessionId(context_->session_id);
GetContext().SetContextId(context_->context_id);
GE_CHK_STATUS_RET_NOLOG(InferShape(shape_inference_engine_.get(), *p_node_state));
return PrepareForExecution(context_, *p_node_state);
});
@@ -273,10 +274,8 @@ Status SubgraphExecutor::PrepareNodes(int group) {
}

Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
GetContext().SetSessionId(context_->context_id);
HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
"[%s] Failed to InferShape.", node_state.GetName().c_str());
GetContext().SetSessionId(context_->session_id);
HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
"[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
return SUCCESS;
@@ -345,6 +344,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
GELOGD("[%s] Start to schedule prepare workers.", graph_item_->GetName().c_str());
auto prepare_future = std::async(std::launch::async, [&]() -> Status {
GetContext().SetSessionId(context_->session_id);
GetContext().SetContextId(context_->context_id);
auto ret = PrepareNodes(group);
ready_queue_.Push(nullptr);
return ret;


+ 15
- 16
ge/hybrid/executor/worker/execution_engine.cc View File

@@ -206,37 +206,35 @@ Status NodeDoneCallback::DumpDynamicNode() {
return PARAM_INVALID;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(graph_context_);
const HybridModel *model = graph_context_->model;
GE_CHECK_NOTNULL(model);
std::string dynamic_model_name = model->GetModelName();
std::string dynamic_om_name = model->GetOmName();
uint32_t model_id = model->GetModelId();
if (!context_->GetDumpProperties().IsLayerNeedDump(dynamic_model_name, dynamic_om_name, op_desc->GetName())) {
GELOGI("[%s] is not in dump list, no need dump", op_desc->GetName().c_str());
return SUCCESS;
}
dump_op_.SetDynamicModelInfo(dynamic_model_name, dynamic_om_name, model_id);

auto stream = context_->GetStream();
vector<uintptr_t> input_addrs;
vector<uintptr_t> output_addrs;
for (int i = 0; i < context_->NumInputs(); i++) {
auto tensor_value = context_->GetInput(i);
GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
uint64_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
uintptr_t input_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
input_addrs.emplace_back(input_addr);
}
for (int j = 0; j < context_->NumOutputs(); j++) {
auto tensor_value = context_->GetOutput(j);
GE_CHK_BOOL_RET_STATUS(tensor_value != nullptr, PARAM_INVALID, "Tensor value is nullptr");
uint64_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
uintptr_t output_addr = reinterpret_cast<uintptr_t>(tensor_value->GetData());
output_addrs.emplace_back(output_addr);
}

dump_op_.SetDumpInfo(context_->GetDumpProperties(), op_desc, input_addrs, output_addrs, stream);

GE_CHECK_NOTNULL(graph_context_);
const HybridModel *model = graph_context_->model;
GE_CHECK_NOTNULL(model);
std::string dynamic_model_name = model->GetModelName();
uint32_t model_id = model->GetModelId();
dump_op_.SetDynamicModelInfo(dynamic_model_name, model_id);

void *global_step = nullptr;
TensorValue *varible_global_step = context_->GetVariable(NODE_NAME_GLOBAL_STEP);
if (varible_global_step != nullptr) {
global_step = const_cast<void *>(varible_global_step->GetData());
}

void *loop_per_iter = nullptr;
TensorValue *varible_loop_per_iter = context_->GetVariable(NODE_NAME_FLOWCTRL_LOOP_PER_ITER);
if (varible_loop_per_iter != nullptr) {
@@ -248,6 +246,7 @@ Status NodeDoneCallback::DumpDynamicNode() {
if (varible_loop_cond != nullptr) {
loop_cond = const_cast<void *>(varible_loop_cond->GetData());
}
void *global_step = context_->GetExecutionContext()->global_step;
dump_op_.SetLoopAddr(global_step, loop_per_iter, loop_cond);

GE_CHK_STATUS_RET(dump_op_.LaunchDumpOp(), "Failed to launch dump op in hybird model");


+ 27
- 5
ge/hybrid/hybrid_davinci_model.cc View File

@@ -19,6 +19,7 @@
#include "hybrid/model/hybrid_model.h"
#include "hybrid/executor/hybrid_model_async_executor.h"
#include "hybrid/node_executor/node_executor.h"
#include "graph/manager/graph_manager_utils.h"

namespace ge {
namespace hybrid {
@@ -76,9 +77,8 @@ class HybridDavinciModel::Impl {
executor_.SetDeviceId(device_id);
}

void SetModelName(const string &model_name) {
model_.SetModelName(model_name);
executor_.SetModelName(model_name);
void SetOmName(const string &model_name) {
model_.SetOmName(model_name);
}

uint64_t GetSessionId() {
@@ -108,6 +108,17 @@ class HybridDavinciModel::Impl {
model_.SetModelDescVersion(is_new_model_desc);
}

uint32_t GetDataInputerSize() { return executor_.GetDataInputerSize(); }

bool GetRunningFlag() const { return executor_.GetRunningFlag(); }

Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
auto listener = dynamic_cast<RunAsyncListener *>(listener_.get());
GE_CHECK_NOTNULL(listener);
listener->SetCallback(callback);
return SUCCESS;
}

private:
std::shared_ptr<ModelListener> listener_;
HybridModel model_;
@@ -181,9 +192,9 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
}
}

void HybridDavinciModel::SetModelName(const string &model_name) {
void HybridDavinciModel::SetOmName(const string &om_name) {
if (impl_ != nullptr) {
impl_->SetModelName(model_name);
impl_->SetOmName(om_name);
}
}

@@ -222,5 +233,16 @@ uint64_t HybridDavinciModel::GetSessionId() {
GE_CHECK_NOTNULL(impl_);
return impl_->GetSessionId();
}

uint32_t HybridDavinciModel::GetDataInputerSize() {
GE_CHECK_NOTNULL(impl_);
return impl_->GetDataInputerSize();
}

bool HybridDavinciModel::GetRunningFlag() const { return impl_->GetRunningFlag(); }

Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
return impl_->SetRunAsyncListenerCallback(callback);
}
} // namespace hybrid
} // namespace ge

+ 7
- 1
ge/hybrid/hybrid_davinci_model.h View File

@@ -57,7 +57,7 @@ class HybridDavinciModel {

void SetDeviceId(uint32_t device_id);

void SetModelName(const string &model_name);
void SetOmName(const string &om_name);

uint64_t GetSessionId();

@@ -74,6 +74,12 @@ class HybridDavinciModel {

void SetModelDescVersion(bool is_new_model_desc);

uint32_t GetDataInputerSize();

bool GetRunningFlag() const;

Status SetRunAsyncListenerCallback(const RunAsyncCallback &callback);

private:
HybridDavinciModel() = default;
class Impl;


+ 13
- 1
ge/hybrid/hybrid_davinci_model_stub.cc View File

@@ -61,13 +61,17 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) {
void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
}

void HybridDavinciModel::SetModelName(const string &model_name) {
void HybridDavinciModel::SetOmName(const string &om_name) {
}

uint64_t HybridDavinciModel::GetSessionId() {
return 0;
}

uint32_t HybridDavinciModel::GetDataInputerSize() {
return 0;
}

Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
return UNSUPPORTED;
}
@@ -87,5 +91,13 @@ Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &i

void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
}

bool HybridDavinciModel::GetRunningFlag() const {
return false;
}

Status HybridDavinciModel::SetRunAsyncListenerCallback(const RunAsyncCallback &callback) {
return UNSUPPORTED;
}
} // namespace hybrid
} // namespace ge

+ 20
- 0
ge/hybrid/model/hybrid_model.cc View File

@@ -357,5 +357,25 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const {

return GetVariable(node->GetName());
}

const map<int64_t, std::vector<std::pair<int, Tensor>>> &HybridModel::GetHostTensors() const {
return host_tensors_;
}

void *HybridModel::GetGlobalStep() const {
if (global_step_ == nullptr) {
return nullptr;
}
return global_step_->GetData();
}

TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const {
auto it = weight_buffer_map_.find(subgraph_name);
if (it == weight_buffer_map_.end()) {
GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str());
return nullptr;
}
return it->second.get();
}
} // namespace hybrid
} // namespace ge

+ 11
- 3
ge/hybrid/model/hybrid_model.h View File

@@ -45,6 +45,8 @@ class HybridModel {
return root_runtime_param_.session_id;
}

void *GetGlobalStep() const;

GeModelPtr GetGeModel(const NodePtr &node) const;

NodeItem *MutableNodeItem(const NodePtr &node);
@@ -69,8 +71,8 @@ class HybridModel {
model_id_ = model_id;
}

void SetModelName(const string &model_name) {
om_name_ = model_name;
void SetOmName(const string &om_name) {
om_name_ = om_name;
}

const std::string &GetOmName() const {
@@ -91,6 +93,10 @@ class HybridModel {

TensorValue* GetTensor(const NodePtr &node) const;

TensorBuffer* GetModelWeight(const std::string &subgraph_name) const;

const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const;

const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const;

const GraphItem *GetRootGraphItem() const;
@@ -135,6 +141,7 @@ class HybridModel {
std::string model_name_;
GeRootModelPtr ge_root_model_;
std::map<uint32_t, NodeItem *> input_nodes_;
ComputeGraphPtr root_graph_;
std::map<std::string, NodePtr> device_variable_nodes_; //lint !e148
std::map<std::string, NodePtr> host_variable_nodes_; //lint !e148
std::map<std::string, std::unique_ptr<TensorValue>> variable_tensors_;
@@ -145,6 +152,7 @@ class HybridModel {
std::unique_ptr<GraphItem> root_graph_item_;
std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_;
std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;
std::map<int64_t, std::vector<std::pair<int, Tensor>>> host_tensors_;

bool is_new_model_desc_ = false; // support aipp
bool is_single_op_ = false;
@@ -153,10 +161,10 @@ class HybridModel {
uint32_t device_id_ = 0;
uint32_t model_id_ = 0;
uint8_t *var_mem_base_ = nullptr;
std::unique_ptr<TensorBuffer> weight_buffer_;
std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_;
RuntimeParam root_runtime_param_;
string om_name_;
std::unique_ptr<TensorBuffer> global_step_;
};
} // namespace hybrid
} // namespace ge


+ 306
- 82
ge/hybrid/model/hybrid_model_builder.cc View File

@@ -130,21 +130,24 @@ HybridModelBuilder::HybridModelBuilder(HybridModel &hybrid_model)

Status HybridModelBuilder::Build() {
GE_CHK_STATUS_RET(ValidateParams(), "Failed to validate GeRootModel");
hybrid_model_.model_name_ = ge_root_model_->GetRootGraph()->GetName();
hybrid_model_.model_name_ = ge_root_model_->GetModelName();
GELOGI("[%s] Start to build hybrid model.", GetGraphName());
GE_CHK_STATUS_RET(InitRuntimeParams(), "[%s] Failed to InitRuntimeParams", GetGraphName());
GE_CHK_STATUS_RET(RecoverGraphUnknownFlag(), "[%s] Failed to RecoverGraphUnknownFlag", GetGraphName());
GE_CHK_STATUS_RET(IndexSpecialNodes(), "[%s] Failed to index nodes", GetGraphName());
GE_CHK_STATUS_RET(IndexTaskDefs(), "[%s] Failed to index task defs", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(LoadGraph(), "[%s] Failed to load graph", GetGraphName());
GE_CHK_STATUS_RET(AssignUninitializedConstantOps(), "[%s] Failed to assign uninitialized constants", GetGraphName());
GE_CHK_STATUS_RET(TransAllVarData(), "[%s] Failed to trans all var data", GetGraphName());
GE_CHK_STATUS_RET(CopyVarData(), "[%s] Failed to copy var data", GetGraphName());
GE_CHK_STATUS_RET(InitModelMem(), "[%s] Failed to init memory", GetGraphName());
GE_CHK_STATUS_RET(InitWeights(), "[%s] Failed to init weights", GetGraphName());
GE_CHK_STATUS_RET(InitConstantOps(), "[%s] Failed to init constant op", GetGraphName());
GE_CHK_STATUS_RET(InitVariableTensors(), "[%s] Failed to init variables", GetGraphName());
GE_CHK_STATUS_RET(LoadTasks(), "[%s] Failed to load tasks", GetGraphName());
GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(),
"[%s] Failed to optimize dependencies for constant inputs",
GetGraphName());
GELOGI("[%s] Done building hybrid model successfully.", GetGraphName());
return SUCCESS;
}
@@ -255,9 +258,7 @@ Status HybridModelBuilder::GetOrCreateNodeItem(const NodePtr &node, NodeItem **n
(void) AttrUtils::SetBool(new_node->op_desc, kIsFirstNode, false);
(void) AttrUtils::SetBool(new_node->op_desc, kIsLastNode, false);

new_node->node_id = node_index;
new_node->op_desc->SetId(node_index);
node_index += 1;
new_node->node_id = static_cast<int>(new_node->op_desc->GetId());
NodeExecutorManager::ExecutorType executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
new_node->is_profiling_report = (executor_type == NodeExecutorManager::ExecutorType::AICORE) ||
(executor_type == NodeExecutorManager::ExecutorType::AICPU_TF) ||
@@ -271,18 +272,18 @@ Status HybridModelBuilder::ParseForceInfershapeNodes(const NodePtr &node, NodeIt
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
// not care result, if no this attr, stand for the op does not need force infershape
(void)AttrUtils::GetBool(op_desc, kForceInfershape, node_item.is_need_force_infershape);
(void) AttrUtils::GetBool(op_desc, kForceInfershape, node_item.is_need_force_infershape);
GELOGD("node [%s] is need do infershape , flag is %d",
op_desc->GetName().c_str(),
node_item.is_need_force_infershape);
op_desc->GetName().c_str(),
node_item.is_need_force_infershape);
return SUCCESS;
}

Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies) {
std::set<NodePtr> dependent_input_nodes;
std::set<NodePtr> dependent_for_shape_inference;
std::set<NodePtr> dependent_for_execution;
auto &ge_node = node_item.node;
bool is_hccl_op =
NodeExecutorManager::GetInstance().ResolveExecutorType(*ge_node) == NodeExecutorManager::ExecutorType::HCCL;
bool is_hccl_op = node_item.IsHcclOp();

// The input tensors become valid after computation is done for parent nodes of type DEPEND_COMPUTE.
// Wait for these parent nodes before execution.
@@ -297,29 +298,15 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
auto src_node_item = MutableNodeItem(src_node);
GE_CHECK_NOTNULL(src_node_item);

if (is_hccl_op) {
GELOGD("[%s] Add input data dependent node [%s] due to engine type is HCCL",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str());
src_node_item->has_observer = true;
node_item.dependents_for_execution.emplace_back(src_node);
node_item.has_observer = true;
for (auto &dst_node : ge_node->GetOutNodes()) {
if (dst_node == nullptr) {
continue;
}

NodeItem *dst_node_item = nullptr;
GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(dst_node, &dst_node_item));
dst_node_item->dependents_for_execution.emplace_back(ge_node);
}
} else if (src_node_item->shape_inference_type == DEPEND_COMPUTE) {
GELOGD("[%s] Add input data dependent node [%s] due to inference type = DEPEND_COMPUTE",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str());

if (src_node_item->shape_inference_type == DEPEND_COMPUTE || is_hccl_op || src_node_item->IsHcclOp()) {
GELOGD("[%s](%s) Add input data dependent node [%s](%s), shape inference type = %d",
ge_node->GetName().c_str(),
ge_node->GetType().c_str(),
src_node->GetName().c_str(),
src_node->GetType().c_str(),
static_cast<int>(src_node_item->shape_inference_type));
src_node_item->has_observer = true;
node_item.dependents_for_execution.emplace_back(src_node);
dependent_for_execution.emplace(src_node);
}

if (src_node_item->shape_inference_type == DEPEND_SHAPE_RANGE) {
@@ -327,22 +314,29 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str());
src_node_item->has_observer = true;
dependent_input_nodes.emplace(src_node);
dependent_for_shape_inference.emplace(src_node);
}
}

for (const auto &src_node : ge_node->GetInControlNodes()) {
auto src_node_item = MutableNodeItem(src_node);
if ((src_node_item != nullptr) && (is_hccl_op || src_node_item->IsHcclOp())) {
GELOGD("[%s](%s) Add input control dependent node [%s](%s)",
ge_node->GetName().c_str(),
ge_node->GetType().c_str(),
src_node->GetName().c_str(),
src_node->GetType().c_str());
dependent_for_execution.emplace(src_node);
}
}

// cond or branch need to be prepared before the execution of IF or CASE
if (node_item.node_type == IF || node_item.node_type == STATELESSIF || node_item.node_type == CASE) {
const auto &in_anchor = ge_node->GetInDataAnchor(0);
GE_CHECK_NOTNULL(in_anchor);
const auto &peer_anchor = in_anchor->GetPeerOutAnchor();
GE_CHECK_NOTNULL(peer_anchor);
auto src_node = peer_anchor->GetOwnerNode();
auto src_node = NodeUtils::GetInDataNodeByIndex(*ge_node, 0); // cond input
GE_CHECK_NOTNULL(src_node);
auto src_node_item = MutableNodeItem(src_node);
GE_CHECK_NOTNULL(src_node_item);
src_node_item->has_observer = true;
node_item.dependents_for_execution.emplace_back(src_node);
dependent_for_execution.emplace(src_node);
GELOGD("[%s] Dependent added from %s for control op's cond/branch",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str());
@@ -366,24 +360,33 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s
GE_CHECK_NOTNULL(src_node);
auto src_node_item = MutableNodeItem(src_node);
src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());
src_node_item->has_observer = true;

dependent_input_nodes.emplace(src_node);
dependent_for_shape_inference.emplace(src_node);
host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item);
GELOGD("[%s] Dependent added from output of [%s:%d]",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str(),
peer_out_anchor->GetIdx());
}

for (const auto &dep_node : dependent_input_nodes) {
GE_CHK_STATUS_RET(ParseDependentForFusedSubgraph(node_item, dependent_for_shape_inference));
for (const auto &dep_node : dependent_for_shape_inference) {
auto src_node_item = MutableNodeItem(dep_node);
GE_CHECK_NOTNULL(src_node_item);
src_node_item->has_observer = true;
node_item.dependents_for_shape_inference.emplace_back(dep_node);
}

GE_CHK_STATUS_RET(ParseDependentForFusedSubgraph(node_item));
for (const auto &dep_node : dependent_for_execution) {
auto src_node_item = MutableNodeItem(dep_node);
GE_CHECK_NOTNULL(src_node_item);
src_node_item->has_observer = true;
node_item.dependents_for_execution.emplace_back(dep_node);
}

return SUCCESS;
}

Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item) {
Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item, std::set<ge::NodePtr> &dependencies) {
if (node_item.fused_subgraph == nullptr) {
return SUCCESS;
}
@@ -413,17 +416,12 @@ Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item) {
node_item.NodeName().c_str(),
op_desc->GetName().c_str(),
src_node_item->NodeName().c_str());
src_node_item->has_observer = true;
src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx());

auto &depends = node_item.dependents_for_shape_inference;
if (std::find(depends.begin(), depends.end(), src_node) == depends.end()) {
depends.emplace_back(src_node);
GELOGD("[%s] Dependent added from output of [%s:%d]",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str(),
peer_out_anchor->GetIdx());
}
dependencies.emplace(src_node);
GELOGD("[%s] Dependent added from output of [%s:%d]",
node_item.NodeName().c_str(),
src_node_item->NodeName().c_str(),
peer_out_anchor->GetIdx());
}

return SUCCESS;
@@ -617,9 +615,10 @@ Status HybridModelBuilder::MergeNetOutputNode(ComputeGraph &graph) {
return SUCCESS;
}

Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph) {
Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph) {
merged_graph = MakeShared<ComputeGraph>("MergedGraph");
for (const auto &node : root_graph.GetDirectNode()) {
merged_graph->SetGraphUnknownFlag(root_graph->GetGraphUnknownFlag());
for (const auto &node : root_graph->GetDirectNode()) {
GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
@@ -649,7 +648,7 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap
}
}
}
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, *merged_graph, *subgraph),
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraph(root_graph, merged_graph, *subgraph),
"[%s] Failed to merge subgraph.",
subgraph->GetName().c_str());
}
@@ -665,18 +664,19 @@ Status HybridModelBuilder::UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGrap
return a_level < b_level;
});

for (auto &remained_subgraph : root_graph.GetAllSubgraphs()) {
for (auto &remained_subgraph : root_graph->GetAllSubgraphs()) {
GELOGD("Adding subgraph [%s] to merged-graph.", remained_subgraph->GetName().c_str());
GE_CHK_GRAPH_STATUS_RET(merged_graph->AddSubgraph(remained_subgraph),
"Failed to add subgraph [%s]",
remained_subgraph->GetName().c_str());
remained_subgraph->SetParentGraph(merged_graph);
}

return SUCCESS;
}

Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph,
ComputeGraph &parent_graph,
Status HybridModelBuilder::UnfoldSubgraph(ComputeGraphPtr &root_graph,
ComputeGraphPtr &parent_graph,
ComputeGraph &sub_graph) {
auto parent_node = sub_graph.GetParentNode();
GE_CHECK_NOTNULL(parent_node);
@@ -705,15 +705,23 @@ Status HybridModelBuilder::UnfoldSubgraph(ComputeGraph &root_graph,
}
}

parent_graph.AddNode(sub_node);
if (!sub_node->GetOpDesc()->GetSubgraphInstanceNames().empty()) {
for (size_t i = 0; i < sub_node->GetOpDesc()->GetSubgraphInstanceNames().size(); ++i) {
auto sub_sub_graph = NodeUtils::GetSubgraph(*sub_node, i);
GE_CHECK_NOTNULL(sub_sub_graph);
sub_sub_graph->SetParentGraph(parent_graph);
}
}
parent_graph->AddNode(sub_node);
GELOGD("[%s::%s] added to parent graph: [%s].",
sub_graph.GetName().c_str(),
sub_node->GetName().c_str(),
parent_graph.GetName().c_str());
parent_graph->GetName().c_str());
sub_node->SetOwnerComputeGraph(parent_graph);
}

GELOGD("[%s] Done merging subgraph. remove it from root graph.", sub_graph.GetName().c_str());
root_graph.RemoveSubgraph(sub_graph.GetName());
root_graph->RemoveSubgraph(sub_graph.GetName());
return SUCCESS;
}

@@ -765,14 +773,28 @@ Status HybridModelBuilder::LoadGraph() {
GELOGI("Before merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
root_graph->GetDirectNodesSize(),
root_graph->GetAllNodesSize());
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(*root_graph, merged_graph), "Failed to unfold subgraphs.");
GE_CHK_GRAPH_STATUS_RET(UnfoldSubgraphs(root_graph, merged_graph), "Failed to unfold subgraphs.");
root_graph = std::move(merged_graph);
GELOGI("After merging subgraphs DirectNodesSize = %zu, GetAllNodesSize = %zu",
root_graph->GetDirectNodesSize(),
root_graph->GetAllNodesSize());
GE_DUMP(root_graph, "hybrid_merged_graph");
}

hybrid_model_.root_graph_ = root_graph;
// Reset node id by topological order across all subgraphs
int64_t index = 0;
for (const auto &node : root_graph->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
auto parent_graph = node->GetOwnerComputeGraph();
// No need to update nodes in known subgraph
if (parent_graph != nullptr && !parent_graph->GetGraphUnknownFlag()) {
continue;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
op_desc->SetId(index++);
}
GE_DUMP(root_graph, "hybrid_merged_graph");
GE_CHK_STATUS_RET(LoadDynamicSubgraph(*root_graph, true), "Failed to load root graph.");
GELOGD("Done loading root graph successfully.");
GE_CHK_STATUS_RET(hybrid_model_.root_graph_item_->GroupNodes(), "Failed to group nodes for root graph");
@@ -810,6 +832,7 @@ Status HybridModelBuilder::LoadGraph() {
}
}

GE_CHK_STATUS_RET(ParseDependentByParallelGroup(), "Failed to establish dependencies for hccl ops");
GELOGI("Done loading all subgraphs successfully.");
return SUCCESS;
}
@@ -1033,9 +1056,13 @@ Status HybridModelBuilder::InitWeights() {
GELOGI("Init weight mem successfully, weight base %p, weight size = %zu",
weight_base,
sub_weight_buffer->GetSize());
auto root_graph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph());
hybrid_model_.weight_buffer_map_.emplace(root_graph->GetName(),std::move(sub_weight_buffer));
for (auto &node : root_graph->GetDirectNode()) {
auto subgraph = GraphUtils::GetComputeGraph(subgraph_model.second->GetGraph());
if (subgraph != ge_root_model_->GetRootGraph()) {
subgraph = ge_root_model_->GetRootGraph()->GetSubgraph(subgraph_model.first);
}
GE_CHECK_NOTNULL(subgraph);
hybrid_model_.weight_buffer_map_.emplace(subgraph->GetName(), std::move(sub_weight_buffer));
for (auto &node : subgraph->GetDirectNode()) {
if (node->GetType() != CONSTANT) {
continue;
}
@@ -1075,25 +1102,41 @@ Status HybridModelBuilder::InitWeights() {
return SUCCESS;
}

Status HybridModelBuilder::LoadTask(NodeItem &node_item) {
auto &node_ptr = node_item.node;
GELOGD("[%s] Start to build kernel task", node_ptr->GetName().c_str());
auto load_ret = node_item.node_executor->LoadTask(hybrid_model_,
node_ptr,
node_item.kernel_task);
if (load_ret != UNSUPPORTED && load_ret != SUCCESS) {
GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str());
return load_ret;
}

GELOGD("[%s] Done loading task successfully.", node_ptr->GetName().c_str());
return SUCCESS;
}

Status HybridModelBuilder::LoadTasks() {
GE_CHK_STATUS_RET(CheckAicpuOpList(), "Check Aicpu op failed.");
std::map<int, std::map<std::string, NodeItem *>> ordered_partitioned_calls;
for (auto &it : hybrid_model_.node_items_) {
auto &node_item = it.second;
auto &node_ptr = node_item->node;
if (node_item->node_type == NETOUTPUT) {
continue;
}

GELOGD("[%s] Start to build kernel task", node_ptr->GetName().c_str());
auto load_ret = node_item->node_executor->LoadTask(hybrid_model_,
node_ptr,
node_item->kernel_task);
if (load_ret != UNSUPPORTED && load_ret != SUCCESS) {
GELOGE(load_ret, "[%s] Failed to load task", node_ptr->GetName().c_str());
return load_ret;
if (node_item->node_type == PARTITIONEDCALL) {
ordered_partitioned_calls[node_item->node_id][node_item->node_name] = node_item.get();
continue;
}
GE_CHK_STATUS_RET_NOLOG(LoadTask(*node_item));
}

GELOGD("[%s] Done loading task successfully.", node_ptr->GetName().c_str());
// HCCL operators need to be loaded in the same order across different processes
for (auto &it : ordered_partitioned_calls) {
for (auto &it2 : it.second) {
GE_CHK_STATUS_RET_NOLOG(LoadTask(*it2.second));
}
}

return SUCCESS;
@@ -1467,7 +1510,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
src_node->GetName().c_str(),
src_op_type.c_str());

if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) {
if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) {
continue;
}

@@ -1476,6 +1519,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) {
GELOGD("Got parent output index = %u", parent_index);
GE_CHECK_LE(parent_index, INT32_MAX);
node_item.ref_outputs.emplace(static_cast<int>(parent_index), src_node);
if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) {
known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node);
}
}

// Data nodes marked with REF_VAR_SRC_VAR_NAME
@@ -1541,6 +1587,10 @@ Status HybridModelBuilder::InitModelMem() {
}

runtime_param_.var_base = hybrid_model_.var_mem_base_;
auto allocator = NpuMemoryAllocator::GetAllocator();
GE_CHECK_NOTNULL(allocator);
hybrid_model_.global_step_ = TensorBuffer::Create(allocator, sizeof(int64_t));
GE_CHECK_NOTNULL(hybrid_model_.global_step_);
return SUCCESS;
}

@@ -1626,6 +1676,7 @@ Status HybridModelBuilder::LoadKnownShapedSubgraph(ComputeGraph &graph, NodeItem
auto temp_graph = MakeShared<ComputeGraph>("temp");
GE_CHECK_NOTNULL(temp_graph);
auto wrapper_node = temp_graph->AddNode(wrapper_op_desc);
wrapper_op_desc->SetId(parent_node_item->node_id);
GeModelPtr ge_model = subgraph_models_[subgraph_name];
GE_CHECK_NOTNULL(ge_model);
hybrid_model_.known_shape_sub_models_.emplace(wrapper_node, ge_model);
@@ -2011,5 +2062,178 @@ Status HybridModelBuilder::CheckAicpuOpList() {
"Launch check aicpu op type failed.");
return SUCCESS;
}

Status HybridModelBuilder::CollectParallelGroups(NodeItem *node_item) {
const auto &node = node_item->node;
auto executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node);
if (executor_type == NodeExecutorManager::ExecutorType::HCCL) {
int64_t parallel_group_val = -1;
if (AttrUtils::GetInt(node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) {
std::string parallel_group = std::to_string(parallel_group_val);
GELOGD("[%s] Got parallel group = [%s]", node_item->NodeName().c_str(), parallel_group.c_str());
parallel_group_to_nodes_[parallel_group].emplace(node_item);
std::set<std::string> group{parallel_group};
node_to_parallel_groups_[node_item].emplace(parallel_group);
}
} else if (executor_type == NodeExecutorManager::ExecutorType::COMPILED_SUBGRAPH) {
std::set<std::string> parallel_groups;
GELOGD("[%s] To collect parallel group for known-shaped subgraph", node_item->NodeName().c_str());
for (const auto &subgraph_name : node->GetOpDesc()->GetSubgraphInstanceNames()) {
GELOGD("[%s] Start to get parallel group from subgraph: %s",
node_item->NodeName().c_str(),
subgraph_name.c_str());
auto subgraph = hybrid_model_.root_graph_->GetSubgraph(subgraph_name);
GE_CHECK_NOTNULL(subgraph);
for (const auto &sub_node : subgraph->GetAllNodes()) {
int64_t parallel_group_val = -1;
if (AttrUtils::GetInt(sub_node->GetOpDesc(), ATTR_NAME_PARALLEL_GROUP, parallel_group_val)) {
std::string parallel_group = std::to_string(parallel_group_val);
GELOGD("[%s::%s] Got parallel group = %s",
subgraph_name.c_str(),
sub_node->GetName().c_str(),
parallel_group.c_str());
parallel_groups.emplace(parallel_group);
}
}
}

if (!parallel_groups.empty()) {
for (const auto &parallel_group : parallel_groups) {
parallel_group_to_nodes_[parallel_group].emplace(node_item);
GELOGD("[%s] has parallel group: %s", node_item->NodeName().c_str(), parallel_group.c_str());
}
node_to_parallel_groups_.emplace(node_item, std::move(parallel_groups));
}
}

return SUCCESS;
}

Status HybridModelBuilder::ParseDependentByParallelGroup() {
for (auto &it : hybrid_model_.node_items_) {
GE_CHK_STATUS_RET_NOLOG(CollectParallelGroups(it.second.get()));
}
for (const auto &it : node_to_parallel_groups_) {
auto node_item = it.first;
auto dst_executor_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*node_item->node);
for (const auto &parallel_group : it.second) {
auto &dependent_nodes = parallel_group_to_nodes_[parallel_group];
NodeItem *nearest_dep_node = nullptr;
int max_id = -1;
for (auto &dep_node : dependent_nodes) {
if (dep_node->node_id < node_item->node_id && dep_node->node_id > max_id) {
nearest_dep_node = dep_node;
max_id = dep_node->node_id;
}
}

if (nearest_dep_node != nullptr) {
GELOGD("[%s] Nearest node = [%s]", node_item->NodeName().c_str(), nearest_dep_node->NodeName().c_str());
auto src_engine_type = NodeExecutorManager::GetInstance().ResolveExecutorType(*nearest_dep_node->node);
if (src_engine_type == dst_executor_type) {
GELOGD("No need to add dependency for nodes with same executor type");
continue;
}
auto &deps = node_item->dependents_for_execution;
if (std::find(deps.begin(), deps.end(), nearest_dep_node->node) != deps.end()) {
GELOGD("%s->%s Already has dependency, skip it",
nearest_dep_node->node->GetName().c_str(),
node_item->NodeName().c_str());
continue;
}
nearest_dep_node->has_observer = true;
deps.emplace_back(nearest_dep_node->node);
GELOGD("Add dependency for nodes with the same parallel group[%s], src = [%s], dst = [%s]",
parallel_group.c_str(),
nearest_dep_node->NodeName().c_str(),
node_item->NodeName().c_str());
}
}
}
return SUCCESS;
}

Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() {
std::map<NodePtr, std::set<uint32_t>> converted;
for (auto &it : host_input_value_dependencies_) {
auto node_item = it.first;
std::map<NodeItem *, int> ref_counts;
bool changed = false;
for (auto output_idx_and_node : it.second) {
auto output_idx = output_idx_and_node.first;
auto src_node_item = output_idx_and_node.second;
++ref_counts[src_node_item];
NodePtr constant_node;
if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) {
constant_node = src_node_item->node;
GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str());
} else {
auto iter = known_subgraph_constant_output_refs_.find(src_node_item);
if (iter != known_subgraph_constant_output_refs_.end()) {
constant_node = iter->second[output_idx];
if (constant_node != nullptr) {
GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str());
}
}
}

if (constant_node == nullptr) {
GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str());
continue;
}

if (converted[constant_node].count(output_idx) == 0) {
GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx),
"[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str());
converted[constant_node].emplace(output_idx);
}

src_node_item->to_const_output_id_list.erase(output_idx);
--ref_counts[src_node_item];
changed = true;
}

if (changed) {
std::vector<NodePtr> depends_to_keep;
for (auto &ref_count_it : ref_counts) {
if (ref_count_it.second == 0) {
GELOGD("[%s] no longer depends on [%s] for shape inference",
node_item->NodeName().c_str(),
ref_count_it.first->NodeName().c_str());
} else {
depends_to_keep.emplace_back(ref_count_it.first->node);
}
}
node_item->dependents_for_shape_inference.swap(depends_to_keep);
}
}

return SUCCESS;
}
Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) {
auto tensor_value = hybrid_model_.GetTensor(node);
GE_CHECK_NOTNULL(tensor_value);
auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0);
GE_CHECK_NOTNULL(tensor_desc);
Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc));
int64_t tensor_size = -1;
GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size),
"[%s] Failed to get tensor size", node->GetName().c_str());
if (tensor_size > 0) {
auto copy_size = static_cast<size_t>(tensor_size);
GE_CHECK_GE(tensor_value->GetSize(), copy_size);
std::vector<uint8_t> buffer(copy_size);
GE_CHK_RT_RET(rtMemcpy(buffer.data(),
copy_size,
tensor_value->GetData(),
copy_size,
RT_MEMCPY_DEVICE_TO_HOST));
tensor.SetData(std::move(buffer));
GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size);
}

hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor));
return SUCCESS;
}
} // namespace hybrid
} // namespace ge

+ 16
- 4
ge/hybrid/model/hybrid_model_builder.h View File

@@ -47,8 +47,8 @@ class HybridModelBuilder {
static Status HandleDtString(const GeTensor &tensor, void *var_addr);
static Status MergeInputNodes(ComputeGraph &compute_graph);
static Status MergeNetOutputNode(ComputeGraph &compute_graph);
static Status UnfoldSubgraphs(ComputeGraph &root_graph, ComputeGraphPtr &merged_graph);
static Status UnfoldSubgraph(ComputeGraph &root_graph, ComputeGraph &parent_graph, ComputeGraph &sub_graph);
static Status UnfoldSubgraphs(ComputeGraphPtr &root_graph, ComputeGraphPtr &merged_graph);
static Status UnfoldSubgraph(ComputeGraphPtr &root_graph, ComputeGraphPtr &parent_graph, ComputeGraph &sub_graph);
static Status BuildInputMapping(GraphItem &graph_item,
std::vector<NodeItem *> &data_nodes,
bool is_root_graph);
@@ -57,14 +57,17 @@ class HybridModelBuilder {
Status ValidateParams();
Status LoadGraph();
Status LoadGeModel(ComputeGraph &graph, const GeModelPtr &ge_model);
Status LoadTask(NodeItem &node_item);
Status LoadTasks();
Status IdentifyVariableOutputs(NodeItem &node_item);
Status IdentifySameInputs(NodeItem &node_item);
Status BuildNodeItem(const NodePtr &node, NodeItem &node_item);
Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item);
Status ParseForceInfershapeNodes(const NodePtr &node, NodeItem &node_item);
Status CollectParallelGroups(NodeItem *node_item);
Status ParseDependentInputNodes(NodeItem &node_item, const std::vector<string> &dependencies);
Status ParseDependentForFusedSubgraph(NodeItem &node_item);
Status ParseDependentForFusedSubgraph(NodeItem &node_item, std::set<ge::NodePtr> &dependencies);
Status ParseDependentByParallelGroup();
Status IndexTaskDefs();
Status IndexTaskDefs(const ComputeGraphPtr &sub_graph, const GeModelPtr &ge_model);
Status IndexSpecialNodes();
@@ -88,6 +91,8 @@ class HybridModelBuilder {
Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector<domi::TaskDef> &task_def_list);
Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector<domi::TaskDef> &task_def_list);
Status OptimizeDependenciesForConstantInputs();
Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx);

const char* GetGraphName() const {
return hybrid_model_.model_name_.c_str();
@@ -99,13 +104,20 @@ class HybridModelBuilder {
GeRootModelPtr ge_root_model_;
std::map<std::string, GeModelPtr> subgraph_models_;
std::map<std::string, NodePtr> constant_op_nodes_;
std::map<std::string, std::set<NodeItem *>> parallel_group_to_nodes_;
std::map<NodeItem *, std::set<std::string>> node_to_parallel_groups_;

HybridModel &hybrid_model_;
std::map<NodePtr, std::vector<std::pair<int, NodePtr>>> node_ref_inputs_;
int node_index = 0;

RuntimeParam &runtime_param_;
VarManager *var_manager_ = nullptr;

// map<known_node_item, map<output_idx, constant_node>>
std::map<NodeItem *, std::map<uint32_t, NodePtr>> known_subgraph_constant_output_refs_;

// map<dst_node_item, vector<output_idx, src_node_item>>
std::map<NodeItem *, std::vector<std::pair<uint32_t, NodeItem *>>> host_input_value_dependencies_;
};
} // namespace hybrid
} // namespace ge


+ 48
- 8
ge/hybrid/model/node_item.cc View File

@@ -149,14 +149,16 @@ Status NodeItem::InitInputsAndOutputs() {
if (AttrUtils::GetInt(op_desc, ::ge::ATTR_STAGE_LEVEL, group)) {
GELOGD("[%s] Got stage level from op_desc = %d", op_desc->GetName().c_str(), group);
} else {
if (AttrUtils::GetInt(node->GetOwnerComputeGraph(), ::ge::ATTR_STAGE_LEVEL, group)) {
GELOGD("[%s] Got stage level from parent graph = %d", op_desc->GetName().c_str(), group);
} else {
auto parent_node = node->GetOwnerComputeGraph()->GetParentNode();
if ((parent_node != nullptr) && (AttrUtils::GetInt(parent_node->GetOpDesc(), ::ge::ATTR_STAGE_LEVEL, group))) {
GELOGD("[%s] Got stage level from parent node = %d", op_desc->GetName().c_str(), group);
if (node->GetOwnerComputeGraph() != nullptr) {
if (AttrUtils::GetInt(node->GetOwnerComputeGraph(), ::ge::ATTR_STAGE_LEVEL, group)) {
GELOGD("[%s] Got stage level from parent graph = %d", op_desc->GetName().c_str(), group);
} else {
GELOGD("[%s] Node do not set stage level", op_desc->GetName().c_str());
auto parent_node = node->GetOwnerComputeGraph()->GetParentNode();
if ((parent_node != nullptr) && (AttrUtils::GetInt(parent_node->GetOpDesc(), ::ge::ATTR_STAGE_LEVEL, group))) {
GELOGD("[%s] Got stage level from parent node = %d", op_desc->GetName().c_str(), group);
} else {
GELOGD("[%s] Node do not set stage level", op_desc->GetName().c_str());
}
}
}
}
@@ -249,6 +251,10 @@ bool NodeItem::IsControlOp() const {
return ge::hybrid::IsControlOp(op_desc->GetType());
}

bool NodeItem::IsHcclOp() const {
return NodeExecutorManager::GetInstance().ResolveExecutorType(*node) == NodeExecutorManager::ExecutorType::HCCL;
}

std::string NodeItem::DebugString() const {
std::stringstream ss;
ss << "Node: ";
@@ -291,7 +297,7 @@ void NodeItem::SetToDynamic() {
}
}

GeTensorDescPtr NodeItem::MutableInputDesc(int index) const {
GeTensorDescPtr NodeItem::DoGetInputDesc(int index) const {
if (!has_optional_inputs) {
return op_desc->MutableInputDesc(static_cast<uint32_t>(index));
}
@@ -308,6 +314,40 @@ GeTensorDescPtr NodeItem::MutableInputDesc(int index) const {
return op_desc->MutableInputDesc(input_desc_indices_[index]);
}

GeTensorDescPtr NodeItem::MutableInputDesc(int index) const {
std::lock_guard<std::mutex> lk(mu_);
return DoGetInputDesc(index);
}

Status NodeItem::GetInputDesc(int index, GeTensorDesc &tensor_desc) const {
std::lock_guard<std::mutex> lk(mu_);
auto input_desc = DoGetInputDesc(index);
GE_CHECK_NOTNULL(input_desc);
tensor_desc = *input_desc;
return SUCCESS;
}

Status NodeItem::GetOutputDesc(int index, GeTensorDesc &tensor_desc) const {
std::lock_guard<std::mutex> lk(mu_);
auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
GE_CHECK_NOTNULL(output_desc);
tensor_desc = *output_desc;
return SUCCESS;
}

GeTensorDescPtr NodeItem::MutableOutputDesc(int index) const {
std::lock_guard<std::mutex> lk(mu_);
return op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
}

Status NodeItem::UpdateInputDesc(int index, const GeTensorDesc &tensor_desc) {
std::lock_guard<std::mutex> lk(mu_);
auto input_desc = DoGetInputDesc(index);
GE_CHECK_NOTNULL(input_desc);
*input_desc = tensor_desc;
return SUCCESS;
}

Status NodeItem::GetCanonicalInputIndex(uint32_t index, int &canonical_index) const {
if (!has_optional_inputs) {
canonical_index = index;


+ 12
- 3
ge/hybrid/model/node_item.h View File

@@ -17,6 +17,7 @@
#ifndef GE_HYBRID_MODEL_NODE_ITEM_H_
#define GE_HYBRID_MODEL_NODE_ITEM_H_

#include <mutex>
#include <vector>
#include "external/ge/ge_api_error_codes.h"
#include "graph/node.h"
@@ -57,16 +58,22 @@ struct NodeItem {

bool IsInputShapeStatic(int index) const;

GeTensorDescPtr MutableOutputDesc(int index) const {
return op_desc->MutableOutputDesc(static_cast<uint32_t>(index));
}
GeTensorDescPtr MutableOutputDesc(int index) const;
Status UpdateInputDesc(int index, const GeTensorDesc &tensor_desc);

GeTensorDescPtr MutableInputDesc(int index) const;

Status GetInputDesc(int index, GeTensorDesc &tensor_desc) const;

Status GetOutputDesc(int index, GeTensorDesc &tensor_desc) const;

Status GetCanonicalInputIndex(uint32_t index, int &canonical_index) const;

bool IsControlOp() const;

bool IsHcclOp() const;

void SetToDynamic();

std::string DebugString() const;
@@ -111,9 +118,11 @@ struct NodeItem {
Status ResolveDynamicState();
Status ResolveStaticInputsAndOutputs();
void ResolveUnknownShapeType();
GeTensorDescPtr DoGetInputDesc(int index) const;

std::vector<bool> is_input_shape_static_;
std::vector<uint32_t> input_desc_indices_;
mutable std::mutex mu_;
};
} // namespace hybrid
} // namespace ge


+ 43
- 16
ge/hybrid/node_executor/aicore/aicore_op_task.cc View File

@@ -71,22 +71,22 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
}

Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
auto op_desc_ptr = std::make_shared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
if (rt_ret != RT_ERROR_NONE || is_single_op_) {
auto op_desc_ptr = MakeShared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto tbe_kernel = op_desc_ptr->TryGetExtAttr(GetKeyForTbeKernel(), TBEKernelPtr());
if (tbe_kernel == nullptr) {
GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
return INTERNAL_ERROR;
}
TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
GELOGI("TBE: can't find the binfile_key[%s] in HandleMap", stub_name_.c_str());
rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMagic(), json_string),
GELOGI("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
@@ -104,7 +104,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
GELOGI("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle));
std::string meta_data;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForTvmMetaData(), meta_data),
GELOGI("Get original type of json_string"));
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
@@ -114,7 +114,7 @@ Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
kernel_store.ReferTBEHandle(stub_name_.c_str());
}
std::string kernel_name;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name),
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, GetKeyForKernelName(op_desc), kernel_name),
GELOGI("Get original type of kernel_name"));
GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0));
@@ -307,11 +307,9 @@ Status AiCoreOpTask::UpdateTilingInfo(TaskContext &context) {

auto execution_context = context.GetExecutionContext();

GetContext().SetSessionId(execution_context->context_id);
RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] Start");
GE_CHK_STATUS_RET(CalcTilingInfo(node, tiling_info));
RECORD_EXECUTION_EVENT(execution_context, context.GetNodeName(), "[CalcTilingInfo] End");
GetContext().SetSessionId(execution_context->session_id);

// update op args by tiling info
block_dim_ = static_cast<uint32_t>(tiling_info.block_dim);
@@ -351,9 +349,6 @@ Status AiCoreOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info)
GE_CHK_STATUS_RET(OpParaCalculate(*node, tiling_info),
"Failed calc tiling data of node %s.",
node->GetName().c_str());
if (is_single_op_) {
tiling_info.clear_atomic = false;
}
GELOGD("[%s] Done invoking OpParaCalculate successfully.", node->GetName().c_str());
return SUCCESS;
}
@@ -470,6 +465,22 @@ std::string AiCoreOpTask::GetKeyForOpParamSize() const {
return kAttrOpParamSize;
}

std::string AiCoreOpTask::GetKeyForTbeKernel() const {
return OP_EXTATTR_NAME_TBE_KERNEL;
}

std::string AiCoreOpTask::GetKeyForTvmMagic() const {
return TVM_ATTR_NAME_MAGIC;
}

std::string AiCoreOpTask::GetKeyForTvmMetaData() const {
return TVM_ATTR_NAME_METADATA;
}

std::string AiCoreOpTask::GetKeyForKernelName(const OpDesc &op_desc) const {
return op_desc.GetName() + "_kernelname";
}

Status AtomicAddrCleanOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
GE_CHK_STATUS_RET_NOLOG(AiCoreOpTask::Init(op_desc, task_def));
return InitAtomicAddrCleanIndices(op_desc);
@@ -526,6 +537,22 @@ std::string AtomicAddrCleanOpTask::GetKeyForOpParamSize() const {
return kAttrAtomicOpParamSize;
}

std::string AtomicAddrCleanOpTask::GetKeyForTbeKernel() const {
return EXT_ATTR_ATOMIC_TBE_KERNEL;
}

std::string AtomicAddrCleanOpTask::GetKeyForTvmMagic() const {
return ATOMIC_ATTR_TVM_MAGIC;
}

std::string AtomicAddrCleanOpTask::GetKeyForTvmMetaData() const {
return ATOMIC_ATTR_TVM_METADATA;
}

std::string AtomicAddrCleanOpTask::GetKeyForKernelName(const OpDesc &op_desc) const {
return op_desc.GetName() + "_atomic_kernelname";
}

Status AtomicAddrCleanOpTask::CalcTilingInfo(const NodePtr &node, OpRunInfo &tiling_info) {
GELOGD("[%s] Start to invoke OpAtomicCalculate.", node->GetName().c_str());
GE_CHK_STATUS_RET(OpAtomicCalculate(*node, tiling_info),


+ 8
- 0
ge/hybrid/node_executor/aicore/aicore_op_task.h View File

@@ -81,6 +81,10 @@ class AiCoreOpTask {
protected:
Status UpdateTilingInfo(TaskContext &context);
virtual std::string GetKeyForOpParamSize() const;
virtual std::string GetKeyForTbeKernel() const;
virtual std::string GetKeyForTvmMagic() const;
virtual std::string GetKeyForTvmMetaData() const;
virtual std::string GetKeyForKernelName(const OpDesc &op_desc) const;
virtual Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info);

std::unique_ptr<TensorBuffer> tiling_buffer_ = nullptr;
@@ -119,6 +123,10 @@ class AtomicAddrCleanOpTask : public AiCoreOpTask {

protected:
std::string GetKeyForOpParamSize() const override;
std::string GetKeyForTbeKernel() const override;
std::string GetKeyForTvmMagic() const override;
std::string GetKeyForTvmMetaData() const override;
std::string GetKeyForKernelName(const OpDesc &op_desc) const override;
Status CalcTilingInfo(const NodePtr &node, optiling::OpRunInfo &tiling_info) override;

private:


+ 1
- 0
ge/hybrid/node_executor/aicore/aicore_task_builder.cc View File

@@ -70,6 +70,7 @@ Status AiCoreTaskBuilder::BuildTask(std::unique_ptr<AiCoreNodeTask> &node_task,
auto atomic_task =
std::unique_ptr<AtomicAddrCleanOpTask>(new(std::nothrow)AtomicAddrCleanOpTask());
GE_CHECK_NOTNULL(atomic_task);
atomic_task->SetSingleOp(is_single_op);
GE_CHK_STATUS_RET(atomic_task->Init(*op_desc_, task_defs_.front()),
"[%s] Failed to init task for AtomicAddrClean",
op_desc_->GetName().c_str());


+ 44
- 38
ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc View File

@@ -18,6 +18,7 @@
#include "cce/aicpu_engine_struct.h"
#include "framework/common/debug/ge_log.h"
#include "framework/common/fmk_error_codes.h"
#include "common/dump/dump_manager.h"
#include "common/ge/ge_util.h"
#include "graph/attr_value.h"
#include "graph/debug/ge_attr_define.h"
@@ -95,13 +96,6 @@ Status KnownNodeTask::UpdateArgs(TaskContext &context) {
Status KnownNodeTask::Init(TaskContext &context) {
// allocate output mem
GE_CHK_STATUS_RET(context.AllocateOutputs(), "known node task allocate output failed.");

// init davinicmodel
if (!load_flag_) {
davinci_model_->InitRuntimeParams();
GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed.");
}

// allocate mem base
void *buffer = nullptr;
if (davinci_model_->TotalMemSize() != 0) {
@@ -112,44 +106,49 @@ Status KnownNodeTask::Init(TaskContext &context) {
"known node task allocate workspace failed.");
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(),
"[KnownNodeTask_AllocateWorkspace] End, size %zu", davinci_model_->TotalMemSize());
bool addr_not_changed = false;
if (davinci_model_->GetRuntimeParam().mem_base == buffer) {
addr_not_changed = true;
}
davinci_model_->SetKnownNodeAddrNotChanged(addr_not_changed);
// update mem base
davinci_model_->UpdateMemBase(static_cast<uint8_t *>(buffer));
GELOGI("KnownNodeTask::Init mem base is %p, size %lu.",
davinci_model_->GetRuntimeParam().mem_base, davinci_model_->GetRuntimeParam().mem_size);
}
if (!load_flag_) {
auto dump_properties = context.GetDumpProperties();
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = nullptr;
TensorValue *varible_global_step = context.GetVariable(NODE_NAME_GLOBAL_STEP);
if (varible_global_step != nullptr) {
global_step = varible_global_step->MutableData();
}
davinci_model_->SetKnownShapeGlobalStep(global_step);
}
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model_->SetDeviceId(device_id);
GE_CHK_STATUS_RET(davinci_model_->Init(), "KnownNodeExecutor::InitDavinciModel failed.");
load_flag_ = true;
} else {
GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
davinci_model_->Id(), davinci_model_->SubModelId()), "KnownNodeTask::Init destroy aicpu kernel failed.");
}
GE_CHK_STATUS_RET(ModelManager::GetInstance()->DestroyAicpuKernel(davinci_model_->GetSessionId(),
davinci_model_->Id(), davinci_model_->SubModelId()),
"KnownNodeTask::Init destroy aicpu kernel failed.");
GELOGI("[%s] KnownNodeExecutor::Init success.", context.GetNodeName());
return SUCCESS;
}

Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) {
GELOGD("[Init][DavinciModel] start");
davinci_model_->InitRuntimeParams();
GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed");
int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id));
davinci_model_->SetDeviceId(static_cast<uint32_t>(device_id));

auto dump_properties = DumpManager::GetInstance().GetDumpProperties(model.GetSessionId());
if (dump_properties.IsDumpOpen() || dump_properties.IsOpDebugOpen()) {
davinci_model_->SetDumpProperties(dump_properties);
void *global_step = model.GetGlobalStep();
davinci_model_->SetKnownShapeGlobalStep(global_step);
}

void *weight = nullptr;
size_t weight_size = 0;
if (weight_buffer != nullptr) {
weight = weight_buffer->GetData();
weight_size = weight_buffer->GetSize();
}
GELOGD("Start to init davinci model, weight size = %zu", weight_size);
GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model.");
GELOGD("[Init][Model] success");
return SUCCESS;
}

Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) {
return davinci_model_->Init(nullptr, 0, weight, weight_size);
}

Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
GELOGD("[%s] KnownNodeExecutor::PrepareTask in.", context.GetNodeName());
RECORD_EXECUTION_EVENT(context.GetExecutionContext(), context.GetNodeName(), "[KnownNodeExecutorPrepareTask] Start");
@@ -173,12 +172,17 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
const GeModelPtr ge_model = model.GetGeModel(node);
GE_CHECK_NOTNULL(ge_model);

AscendString graph_name;
GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name");
auto weight_buffer = model.GetModelWeight(graph_name.GetString());

std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
GE_CHECK_NOTNULL(davinci_model);

// set known node flag as true
davinci_model->SetKnownNode(true);
davinci_model->SetId(model.GetModelId());
davinci_model->SetDumpModelName(model.GetModelName());
davinci_model->SetOmName(model.GetOmName());
// set model id as root node's node id
davinci_model->SetSubModelId(node->GetOpDesc()->GetId());
@@ -186,9 +190,11 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node

GE_CHK_STATUS_RET(davinci_model->Assign(ge_model), "KnownNodeExecutor::LoadTask davincimodel assign failed.");

task = MakeShared<KnownNodeTask>(davinci_model);
GE_CHECK_NOTNULL(task);
auto known_node_task = MakeShared<KnownNodeTask>(davinci_model);
GE_CHECK_NOTNULL(known_node_task);
GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer));
GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str());
task = std::move(known_node_task);
return SUCCESS;
}



Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save