Browse Source

Merge remote-tracking branch 'upstream/development' into review_control_passes

pull/401/head
chenyemeng 4 years ago
parent
commit
b11b78f78e
100 changed files with 962 additions and 1192 deletions
  1. +13
    -6
      CMakeLists.txt
  2. +1
    -1
      cmake/external_libs/protobuf_static.cmake
  3. +6
    -5
      ge/CMakeLists.txt
  4. +1
    -1
      ge/client/ge_api.cc
  5. +2
    -2
      ge/common/CMakeLists.txt
  6. +5
    -7
      ge/common/auth/file_saver.cc
  7. +2
    -2
      ge/common/cust_aicpu_kernel_store.cc
  8. +7
    -10
      ge/common/debug/memory_dumper.cc
  9. +8
    -8
      ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc
  10. +17
    -10
      ge/common/ge/op_tiling_manager.cc
  11. +39
    -28
      ge/common/ge/plugin_manager.cc
  12. +6
    -6
      ge/common/ge/plugin_manager.h
  13. +43
    -39
      ge/common/ge/tbe_plugin_manager.cc
  14. +0
    -1
      ge/common/ge/tbe_plugin_manager.h
  15. +4
    -2
      ge/common/ge_common.mk
  16. +2
    -4
      ge/common/helper/model_cache_helper.cc
  17. +14
    -17
      ge/common/helper/model_helper.cc
  18. +6
    -6
      ge/common/helper/om_file_helper.cc
  19. +2
    -2
      ge/common/kernel_store.cc
  20. +1
    -2
      ge/common/model_parser/base.cc
  21. +6
    -8
      ge/common/model_saver.cc
  22. +16
    -12
      ge/common/profiling/profiling_manager.cc
  23. +5
    -4
      ge/common/properties_manager.h
  24. +56
    -29
      ge/common/util.cc
  25. +1
    -1
      ge/engine_manager/dnnengine_manager.cc
  26. +1
    -15
      ge/executor/ge_executor.cc
  27. +2
    -2
      ge/ge_inference.mk
  28. +1
    -1
      ge/ge_local_engine/engine/host_cpu_engine.cc
  29. +19
    -0
      ge/ge_local_engine/module.mk
  30. +2
    -2
      ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc
  31. +1
    -1
      ge/ge_local_engine/ops_kernel_store/op/no_op.cc
  32. +1
    -1
      ge/ge_runner.mk
  33. +167
    -57
      ge/ge_runtime/task/hccl_task.cc
  34. +24
    -6
      ge/ge_runtime/task/hccl_task.h
  35. +1
    -9
      ge/generator/ge_generator.cc
  36. +2
    -6
      ge/graph/build/graph_builder.cc
  37. +1
    -1
      ge/graph/build/label_allocator.cc
  38. +4
    -4
      ge/graph/build/logical_stream_allocator.cc
  39. +2
    -2
      ge/graph/build/memory/binary_block_mem_assigner.cc
  40. +9
    -10
      ge/graph/build/memory/block_mem_assigner.cc
  41. +5
    -8
      ge/graph/build/memory/graph_mem_assigner.cc
  42. +2
    -2
      ge/graph/build/memory/hybrid_mem_assigner.cc
  43. +1
    -8
      ge/graph/build/model_builder.cc
  44. +6
    -6
      ge/graph/build/run_context.cc
  45. +4
    -9
      ge/graph/build/stream_allocator.cc
  46. +4
    -6
      ge/graph/build/stream_graph_optimizer.cc
  47. +5
    -6
      ge/graph/build/task_generator.cc
  48. +1
    -1
      ge/graph/execute/graph_execute.cc
  49. +1
    -1
      ge/graph/load/graph_loader.cc
  50. +1
    -1
      ge/graph/load/new_model_manager/cpu_queue_schedule.cc
  51. +3
    -15
      ge/graph/load/new_model_manager/data_dumper.cc
  52. +79
    -87
      ge/graph/load/new_model_manager/davinci_model.cc
  53. +19
    -14
      ge/graph/load/new_model_manager/model_manager.cc
  54. +0
    -2
      ge/graph/load/new_model_manager/model_manager.h
  55. +4
    -6
      ge/graph/load/new_model_manager/model_utils.cc
  56. +2
    -2
      ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
  57. +61
    -22
      ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
  58. +1
    -1
      ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
  59. +8
    -6
      ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
  60. +4
    -2
      ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h
  61. +13
    -31
      ge/graph/load/new_model_manager/zero_copy_offset.cc
  62. +1
    -2
      ge/graph/load/new_model_manager/zero_copy_offset.h
  63. +4
    -3
      ge/graph/load/new_model_manager/zero_copy_task.cc
  64. +23
    -36
      ge/graph/manager/graph_manager.cc
  65. +7
    -7
      ge/graph/manager/graph_var_manager.cc
  66. +4
    -4
      ge/graph/manager/trans_var_data_utils.cc
  67. +3
    -3
      ge/graph/manager/util/debug.cc
  68. +0
    -2
      ge/graph/manager/util/debug.h
  69. +3
    -4
      ge/graph/optimize/graph_optimize.cc
  70. +0
    -397
      ge/graph/optimize/optimizer/allreduce_fusion_pass.cc
  71. +0
    -56
      ge/graph/optimize/optimizer/allreduce_fusion_pass.h
  72. +2
    -2
      ge/graph/partition/engine_place.cc
  73. +6
    -6
      ge/graph/partition/graph_partition.cc
  74. +1
    -1
      ge/graph/passes/atomic_addr_clean_pass.cc
  75. +1
    -1
      ge/graph/passes/cond_remove_pass.cc
  76. +11
    -0
      ge/graph/passes/mark_agnostic_pass.cc
  77. +1
    -1
      ge/graph/passes/memcpy_addr_async_pass.cc
  78. +2
    -2
      ge/graph/passes/multi_batch_pass.cc
  79. +2
    -2
      ge/graph/passes/set_input_output_offset_pass.cc
  80. +2
    -0
      ge/graph/preprocess/graph_preprocess.cc
  81. +4
    -3
      ge/graph/preprocess/multi_batch_copy_graph.cc
  82. +6
    -0
      ge/graph/preprocess/multi_batch_options.cc
  83. +20
    -0
      ge/host_cpu_engine/module.mk
  84. +2
    -2
      ge/hybrid/executor/hybrid_execution_context.h
  85. +36
    -0
      ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc
  86. +8
    -0
      ge/hybrid/node_executor/aicpu/aicpu_ext_info.h
  87. +26
    -17
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
  88. +1
    -1
      ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
  89. +5
    -5
      ge/init/gelib.cc
  90. +1
    -1
      ge/ir_build/atc_ir_common.cc
  91. +6
    -6
      ge/ir_build/ge_ir_build.cc
  92. +1
    -1
      ge/offline/CMakeLists.txt
  93. +9
    -7
      ge/omm/csa_interact.cc
  94. +2
    -2
      ge/opskernel_manager/ops_kernel_manager.cc
  95. +14
    -14
      ge/session/session_manager.cc
  96. +3
    -15
      ge/single_op/single_op.cc
  97. +0
    -4
      ge/single_op/single_op.h
  98. +17
    -15
      ge/single_op/single_op_model.cc
  99. +2
    -2
      ge/single_op/single_op_model.h
  100. +2
    -2
      ge/single_op/task/aicpu_kernel_task_builder.cc

+ 13
- 6
CMakeLists.txt View File

@@ -56,7 +56,7 @@ if (ENABLE_OPEN_SRC)
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH})
set(STATIC_ACL_LIB ${GE_LIB_PATH})
find_module(slog libslog.so ${GE_LIB_PATH})
find_module(mmpa libmmpa.so ${GE_LIB_PATH})
find_module(static_mmpa libmmpa.a ${GE_LIB_PATH})
find_module(msprof libmsprof.so ${GE_LIB_PATH})
find_module(hccl libhccl.so ${GE_LIB_PATH})
find_module(adump_server libadump_server.a ${GE_LIB_PATH})
@@ -67,10 +67,10 @@ if (ENABLE_OPEN_SRC)
find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH})
find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH})
find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH})
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH})
else()
find_module(slog libslog.so ${ASCEND_ATC_DIR})
find_module(mmpa libmmpa.so ${ASCEND_ATC_DIR})
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR})
if(PLATFORM STREQUAL "train")
find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR})
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR})
@@ -91,7 +91,7 @@ if (ENABLE_OPEN_SRC)
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR})
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
if(PRODUCT STREQUAL "flr3")
find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR})
elseif(PRODUCT STREQUAL "flr1")
@@ -114,7 +114,7 @@ if (ENABLE_OPEN_SRC)
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR})
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR})
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver)
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR})
else()
message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated")
endif()
@@ -148,16 +148,23 @@ elseif (ENABLE_D OR ENABLE_ACL)
# common libraries
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH})
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})

if (ENABLE_D)
# training
find_module(mmpa libmmpa.so ${ASCEND_MS_DRIVER_PATH})
find_module(runtime libruntime.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
find_module(register libregister.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH})
endif ()

set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
add_subdirectory(metadef)
elseif(ENABLE_MS_TESTCASE)
include(cmake/external_libs/protobuf_static.cmake)
include(cmake/external_libs/securec.cmake)
include(cmake/intf_pub_linux.cmake)

set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef)
add_subdirectory(metadef)
else()
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/../metadef)
set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/../parser)


+ 1
- 1
cmake/external_libs/protobuf_static.cmake View File

@@ -48,7 +48,7 @@ set_target_properties(ascend_protobuf_static_lib PROPERTIES
add_library(ascend_protobuf_static INTERFACE)
target_include_directories(ascend_protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include)
target_link_libraries(ascend_protobuf_static INTERFACE ascend_protobuf_static_lib)
if (ENABLE_D OR ENABLE_ACL)
if (ENABLE_D OR ENABLE_ACL OR ENABLE_MS_TESTCASES)
include_directories(${PROTOBUF_STATIC_PKG_DIR}/include)
endif ()



+ 6
- 5
ge/CMakeLists.txt View File

@@ -1,4 +1,4 @@
if (NOT ENABLE_D AND NOT ENABLE_ACL)
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
add_subdirectory(common)
add_subdirectory(plugin/engine)
add_subdirectory(graph/build/memory)
@@ -600,7 +600,7 @@ set(INFER_SRC_LIST
"analyzer/analyzer.cc"
)

if (NOT ENABLE_D AND NOT ENABLE_ACL)
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
############ libge_runner.so ############
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})

@@ -648,6 +648,7 @@ target_link_libraries(ge_runner
ge_memory
adump_server
msprofiler
static_mmpa
-Wl,--no-as-needed
graph
ge_common
@@ -655,7 +656,6 @@ target_link_libraries(ge_runner
register
c_sec
slog
mmpa
msprof
runtime
resource
@@ -712,6 +712,7 @@ target_include_directories(ge_compiler PRIVATE
target_link_libraries(ge_compiler
$<BUILD_INTERFACE:intf_pub>
ge_memory
static_mmpa
-Wl,--no-as-needed
graph
ge_common
@@ -720,7 +721,6 @@ target_link_libraries(ge_compiler
c_sec
error_manager
slog
mmpa
runtime_compile
resource
-Wl,--as-needed
@@ -770,6 +770,7 @@ target_link_libraries(opensrc_ascendcl PRIVATE
ge_executor
ge_common_static
graph_static
static_mmpa
ascend_protobuf_static
register_static
error_manager_static
@@ -779,11 +780,11 @@ target_link_libraries(opensrc_ascendcl PRIVATE
-Wl,--no-as-needed
c_sec
runtime
mmpa
slog
msprof
ascend_hal_stub
-Wl,--as-needed
-lrt
-ldl
json
)


+ 1
- 1
ge/client/ge_api.cc View File

@@ -177,7 +177,7 @@ Session::Session(const std::map<string, string> &options) {
// check init status
sessionId_ = 0;
if (!g_ge_initialized) {
GELOGE(GE_CLI_GE_NOT_INITIALIZED);
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized.");
return;
}
// call Initialize


+ 2
- 2
ge/common/CMakeLists.txt View File

@@ -105,6 +105,7 @@ target_include_directories(ge_common PRIVATE

target_link_libraries(ge_common PRIVATE
$<BUILD_INTERFACE:intf_pub>
static_mmpa
-Wl,--no-as-needed
graph
ascend_protobuf
@@ -112,7 +113,6 @@ target_link_libraries(ge_common PRIVATE
c_sec
error_manager
slog
mmpa
-Wl,--as-needed
json
-lrt
@@ -210,7 +210,7 @@ target_link_libraries(ge_common PRIVATE
c_sec
error_manager
slog
mmpa
static_mmpa
-Wl,--as-needed
json
-lrt


+ 5
- 7
ge/common/auth/file_saver.cc View File

@@ -16,9 +16,7 @@

#include "common/auth/file_saver.h"

#include <fcntl.h>
#include <securec.h>
#include <unistd.h>
#include <cstdlib>
#include <fstream>
#include <vector>
@@ -39,12 +37,12 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) {
return FAILED;
}

char real_path[PATH_MAX] = {0};
GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr,
char real_path[MMPA_MAX_PATH] = {0};
GE_IF_BOOL_EXEC(mmRealPath(file_path.c_str(), real_path, MMPA_MAX_PATH) != EN_OK,
GELOGI("File %s is not exist, it will be created.", file_path.c_str()));
// Open file
mode_t mode = S_IRUSR | S_IWUSR;
fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);
mmMode_t mode = M_IRUSR | M_IWUSR;
fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode);
if (fd == EN_INVALID_PARAM || fd == EN_ERROR) {
// -1: Failed to open file; - 2: Illegal parameter
GELOGE(FAILED, "Open file failed. mmpa_errno = %d, %s", fd, strerror(errno));
@@ -194,7 +192,7 @@ Status FileSaver::SaveToBuffWithFileHeader(const ModelFileHeader &file_header,

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::CheckPath(const std::string &file_path) {
// Determine file path length
if (file_path.size() >= PATH_MAX) {
if (file_path.size() >= MMPA_MAX_PATH) {
GELOGE(FAILED, "Path is too long:%zu", file_path.size());
return FAILED;
}


+ 2
- 2
ge/common/cust_aicpu_kernel_store.cc View File

@@ -25,7 +25,7 @@ void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel)
}

void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> &op_desc) const {
GELOGI("LoadCustAICPUKernelBinToOpDesc in");
GELOGD("LoadCustAICPUKernelBinToOpDesc in");
if (op_desc != nullptr) {
auto kernel_bin = FindKernel(op_desc->GetName());
if (kernel_bin != nullptr) {
@@ -34,6 +34,6 @@ void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<
GELOGI("Load cust aicpu kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize());
}
}
GELOGI("LoadCustAICPUKernelBinToOpDesc success");
GELOGD("LoadCustAICPUKernelBinToOpDesc success");
}
} // namespace ge

+ 7
- 10
ge/common/debug/memory_dumper.cc View File

@@ -16,9 +16,6 @@

#include "common/debug/memory_dumper.h"

#include <fcntl.h>

#include <unistd.h>
#include <string>

#include "framework/common/debug/log.h"
@@ -138,26 +135,26 @@ int MemoryDumper::OpenFile(const char *filename) {
}
// Get the absolute path
string real_path;
char tmp_path[PATH_MAX] = {0};
char tmp_path[MMPA_MAX_PATH] = {0};
GE_IF_BOOL_EXEC(
-1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos);
string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= PATH_MAX, return kInvalidFd, "Prefix path is too long!");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(realpath(prefix_path.c_str(), tmp_path) == nullptr, return kInvalidFd,
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd,
"Dir %s does not exit.", prefix_path.c_str());
real_path = std::string(tmp_path) + last_path;)
GE_IF_BOOL_EXEC(
path_split_pos == -1 || path_split_pos == 0,
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= PATH_MAX, return kInvalidFd, "Prefix path is too long!");
GE_IF_BOOL_EXEC(realpath(filename, tmp_path) == nullptr,
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!");
GE_IF_BOOL_EXEC(mmRealPath(filename, tmp_path, MMPA_MAX_PATH) != EN_OK,
GELOGI("File %s does not exit, it will be created.", filename));
real_path = std::string(tmp_path);)

// Open file, only the current user can read and write, to avoid malicious application access
// Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability.
mode_t mode = S_IRUSR | S_IWUSR;
mmMode_t mode = M_IRUSR | M_IWUSR;

int32_t fd = mmOpen2(real_path.c_str(), O_RDWR | O_CREAT | O_APPEND, mode);
int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | O_TRUNC, mode);
if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
GELOGE(kInvalidFd, "open file failed. errno = %d, %s", fd, strerror(errno));
return kInvalidFd;


+ 8
- 8
ge/common/formats/format_transfers/format_transfer_nchw_fz_c04.cc View File

@@ -118,19 +118,19 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) {

// data overflow check totally
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o);
return INTERNAL_ERROR);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o);
return INTERNAL_ERROR);
auto t1 = h_o * w_o;
auto t2 = n_o * c_o;
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
return INTERNAL_ERROR);

int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size);
return INTERNAL_ERROR);
int64_t dst_size = total_ele_cnt * size;
if (dst_size == 0) {
@@ -205,20 +205,20 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin

// data overflow check
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o);
return INTERNAL_ERROR);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o);
return INTERNAL_ERROR);
auto t1 = h_o * w_o;
auto t2 = n_o * c_o;
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2);
return INTERNAL_ERROR);

int64_t total_ele_cnt = n_o * c_o * h_o * w_o;
int size = GetSizeByDataType(args.src_data_type);
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size),
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size);
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size);
return INTERNAL_ERROR);

int64_t dst_size = total_ele_cnt * size;


+ 17
- 10
ge/common/ge/op_tiling_manager.cc View File

@@ -30,8 +30,10 @@ const uint8_t kPrefixIndex = 9;
namespace ge {
void OpTilingManager::ClearHandles() noexcept {
for (const auto &handle : handles_) {
if (dlclose(handle.second) != 0) {
GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), dlerror());
if (mmDlclose(handle.second) != 0) {
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), error);
}
}
handles_.clear();
@@ -40,11 +42,12 @@ void OpTilingManager::ClearHandles() noexcept {
OpTilingManager::~OpTilingManager() { ClearHandles(); }

std::string OpTilingManager::GetPath() {
const char *opp_path_env = std::getenv(kEnvName);
char opp_path_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(kEnvName, opp_path_env, MMPA_MAX_PATH);
std::string opp_path = kDefaultPath;
if (opp_path_env != nullptr) {
char resolved_path[PATH_MAX];
if (realpath(opp_path_env, resolved_path) == NULL) {
if (res == EN_OK) {
char resolved_path[MMPA_MAX_PATH];
if (mmRealPath(opp_path_env, resolved_path, MMPA_MAX_PATH) != EN_OK) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"});
GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env);
@@ -66,16 +69,20 @@ void OpTilingManager::LoadSo() {
std::string built_in_name = kDefaultBuiltInTilingPath.substr(kPrefixIndex);
std::string custom_name = kDefaultCustomTilingPath.substr(kPrefixIndex);

void *handle_bi = dlopen(built_in_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL);
void *handle_bi = mmDlopen(built_in_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle_bi == nullptr) {
GELOGW("Failed to dlopen %s!", dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to dlopen %s!", error);
} else {
handles_[built_in_name] = handle_bi;
}

void *handle_ct = dlopen(custom_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL);
void *handle_ct = mmDlopen(custom_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle_ct == nullptr) {
GELOGW("Failed to dlopen %s!", dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to dlopen %s!", error);
} else {
handles_[custom_name] = handle_ct;
}


+ 39
- 28
ge/common/ge/plugin_manager.cc View File

@@ -16,9 +16,7 @@

#include "common/ge/plugin_manager.h"

#include <dirent.h>
#include <sys/stat.h>
#include <unistd.h>
#include <algorithm>
#include <cstring>
#include <fstream>
@@ -38,8 +36,10 @@ const char *const kExt = ".so"; // supported extension of shared obje
namespace ge {
void PluginManager::ClearHandles_() noexcept {
for (const auto &handle : handles_) {
if (dlclose(handle.second) != 0) {
GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), dlerror());
if (mmDlclose(handle.second) != 0) {
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), error);
}
}
handles_.clear();
@@ -48,18 +48,18 @@ void PluginManager::ClearHandles_() noexcept {
PluginManager::~PluginManager() { ClearHandles_(); }

string PluginManager::GetPath() {
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) {
mmDlInfo dl_info;
if (mmDladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) != EN_OK) {
GELOGW("Failed to read the shared library file path!");
return string();
} else {
std::string so_path = dl_info.dli_fname;
char path[PATH_MAX] = {0};
if (so_path.length() >= PATH_MAX) {
char path[MMPA_MAX_PATH] = {0};
if (so_path.length() >= MMPA_MAX_PATH) {
GELOGW("The shared library file path is too long!");
return string();
}
if (realpath(so_path.c_str(), path) == nullptr) {
if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) {
GELOGW("Failed to get realpath of %s", so_path.c_str());
return string();
}
@@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
std::vector<std::string> path_vec;
SplitPath(path, path_vec);
for (const auto &single_path : path_vec) {
GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID,
GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID,
"The shared library file path is too long!");
continue);
// load break when number of loaded so reach maximum
@@ -119,16 +119,18 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str());

// load continue when dlopen is failed
auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL);
auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle == nullptr) {
GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error);
continue;
}

// load continue when so is invalid
bool is_valid = true;
for (const auto &func_name : func_check_list) {
auto real_fn = (void (*)())dlsym(handle, func_name.c_str());
auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(),
func_name.c_str());
@@ -137,7 +139,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec
}
}
if (!is_valid) {
GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose.");
continue;
}

@@ -197,22 +199,29 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
so_list_.clear();
ClearHandles_();

char canonical_path[PATH_MAX] = {0};
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= PATH_MAX, GELOGW("File path is too long!");
char canonical_path[MMPA_MAX_PATH] = {0};
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= MMPA_MAX_PATH, GELOGW("File path is too long!");
return FAILED, "File path is too long!");
if (realpath(path.c_str(), canonical_path) == nullptr) {
if (mmRealPath(path.c_str(), canonical_path, MMPA_MAX_PATH) != EN_OK) {
GELOGW("Failed to get realpath of %s", path.c_str());
return SUCCESS;
}

DIR *dir = opendir(canonical_path);
if (dir == nullptr) {
GELOGW("Invalid path for load: %s", path.c_str());
return SUCCESS;
INT32 is_dir = mmIsDir(canonical_path);
// Lib plugin path not exist
if (is_dir != EN_OK) {
GELOGW("Invalid path for load: %s", path.c_str());
return SUCCESS;
}

struct dirent *entry = nullptr;
while ((entry = readdir(dir)) != nullptr) {
mmDirent **entries = nullptr;
auto ret = mmScandir(canonical_path, &entries, nullptr, nullptr);
if (ret < EN_OK) {
GELOGW("scan dir failed. path = %s, ret = %d", canonical_path, ret);
return FAILED;
}
for (int i = 0; i < ret; ++i) {
mmDirent *entry = entries[i];
// read fileName and fileType
std::string file_name = entry->d_name;
unsigned char file_type = entry->d_type;
@@ -250,9 +259,11 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
GELOGI("Dlopen so path name: %s. ", file_path_dlopen.c_str());

// load continue when dlopen is failed
auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL);
auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle == nullptr) {
GELOGW("Failed in dlopen %s!", dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed in dlopen %s!", error);
continue;
}

@@ -261,7 +272,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
// load continue when so is invalid
bool is_valid = true;
for (const auto &func_name : func_check_list) {
auto real_fn = (void (*)())dlsym(handle, func_name.c_str());
auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("The %s is skipped since function %s is not existed!", file_name.c_str(), func_name.c_str());
is_valid = false;
@@ -269,7 +280,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
}
}
if (!is_valid) {
GE_LOGE_IF(dlclose(handle), "Failed to dlclose.");
GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose.");
continue;
}

@@ -279,7 +290,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_
handles_[string(file_name)] = handle;
num_of_loaded_so++;
}
closedir(dir);
mmScandirFree(entries, ret);
if (num_of_loaded_so == 0) {
GELOGW("No loadable shared library found in the path: %s", path.c_str());
return SUCCESS;


+ 6
- 6
ge/common/ge/plugin_manager.h View File

@@ -17,7 +17,6 @@
#ifndef GE_COMMON_GE_PLUGIN_MANAGER_H_
#define GE_COMMON_GE_PLUGIN_MANAGER_H_

#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>
@@ -30,6 +29,7 @@
#include "common/ge_inner_error_codes.h"
#include "engine/dnnengine.h"
#include "framework/common/debug/ge_log.h"
#include "mmpa/mmpa_api.h"

namespace ge {
using SoToHandleMap = std::map<std::string, void *>;
@@ -57,7 +57,7 @@ class PluginManager {
template <typename R, typename... Types>
Status GetAllFunctions(const string &func_name, map<string, function<R(Types... args)>> &funcs) {
for (const auto &handle : handles_) {
auto real_fn = (R(*)(Types...))dlsym(handle.second, func_name.c_str());
auto real_fn = (R(*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("Failed to get function %s in %s!", func_name.c_str(), handle.first.c_str());
return GE_PLGMGR_FUNC_NOT_EXIST;
@@ -72,7 +72,7 @@ class PluginManager {
Status InvokeAll(const string &func_name, Types... args) {
for (const auto &handle : handles_) {
// If the funcName is existed, signature of realFn can be casted to any type
auto real_fn = (void (*)(Types...))dlsym(handle.second, func_name.c_str());
auto real_fn = (void (*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str());
return GE_PLGMGR_INVOKE_FAILED;
@@ -87,7 +87,7 @@ class PluginManager {
Status InvokeAll(const string &func_name, T arg) {
for (const auto &handle : handles_) {
// If the funcName is existed, signature of realFn can be casted to any type
auto real_fn = (void (*)(T))dlsym(handle.second, func_name.c_str());
auto real_fn = (void (*)(T))mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str());
return GE_PLGMGR_INVOKE_FAILED;
@@ -112,7 +112,7 @@ class PluginManager {
Status InvokeAll(const string &func_name, T1 arg) {
for (const auto &handle : handles_) {
// If the funcName is existed, signature of realFn can be casted to any type
auto real_fn = (T2(*)(T1))dlsym(handle.second, func_name.c_str());
auto real_fn = (T2(*)(T1))mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str());
return GE_PLGMGR_INVOKE_FAILED;
@@ -130,7 +130,7 @@ class PluginManager {
Status InvokeAll(const string &func_name) {
for (const auto &handle : handles_) {
// If the funcName is existed, signature of realFn can be casted to any type
auto real_fn = (T(*)())dlsym(handle.second, func_name.c_str());
auto real_fn = (T(*)())mmDlsym(handle.second, const_cast<char *>(func_name.c_str()));
if (real_fn == nullptr) {
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str());
return GE_PLGMGR_INVOKE_FAILED;


+ 43
- 39
ge/common/ge/tbe_plugin_manager.cc View File

@@ -16,8 +16,6 @@

#include "common/ge/tbe_plugin_manager.h"

#include <dirent.h>
#include <unistd.h>
#include <algorithm>
#include <cstring>
#include <fstream>
@@ -50,9 +48,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginMana
Status TBEPluginManager::ClearHandles_() {
Status ret = SUCCESS;
for (const auto &handle : handles_vec_) {
if (dlclose(handle) != 0) {
if (mmDlclose(handle) != 0) {
ret = FAILED;
GELOGW("Failed to close handle: %s", dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle: %s", error);
}
}
handles_vec_.clear();
@@ -65,18 +65,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finali
}

string TBEPluginManager::GetPath() {
Dl_info dl_info;
if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) {
mmDlInfo dl_info;
if (mmDladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) != EN_OK) {
GELOGW("Failed to read so path!");
return string();
} else {
string so_path = dl_info.dli_fname;
char path[PATH_MAX] = {0};
if (so_path.length() >= PATH_MAX) {
char path[MMPA_MAX_PATH] = {0};
if (so_path.length() >= MMPA_MAX_PATH) {
GELOGW("File path is too long!");
return string();
}
if (realpath(so_path.c_str(), path) == nullptr) {
if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) {
GELOGW("Failed to get realpath of %s", so_path.c_str());
return string();
}
@@ -108,35 +108,36 @@ void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_lis
GELOGW("RealPath is empty.");
return;
}
struct stat stat_buf;
if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) {
GELOGW("%s is not a dir.", real_path.c_str());
return;
}
struct dirent *dent(0);
DIR *dir = opendir(real_path.c_str());
// Plugin path does not exist
if (dir == nullptr) {
GELOGW("Open directory %s failed.", real_path.c_str());
return;
INT32 is_dir = mmIsDir(real_path.c_str());
// Lib plugin path not exist
if (is_dir != EN_OK) {
GELOGW("%s is not a dir.", real_path.c_str());
return;
}

while ((dent = readdir(dir)) != nullptr) {
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue;
string name = dent->d_name;
string full_name = real_path + "/" + name;
const string so_suff = ".so";
const string caffe_parser_so_suff = "lib_caffe_parser.so";
const string aicpu_so_suff = "_aicpu.so";
const string aicpu_host_so_suff = "_online.so";
if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) {
ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff,
aicpu_host_so_suff);
} else {
FindParserSo(full_name, file_list, caffe_parser_path);
}
mmDirent **entries = nullptr;
auto ret = mmScandir(real_path.c_str(), &entries, nullptr, nullptr);
if (ret < EN_OK) {
GELOGW("scan dir failed. path = %s, ret = %d", real_path.c_str(), ret);
return;
}
for (int i = 0; i < ret; ++i) {
mmDirent *dent = entries[i];
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue;
string name = dent->d_name;
string full_name = real_path + "/" + name;
const string so_suff = ".so";
const string caffe_parser_so_suff = "lib_caffe_parser.so";
const string aicpu_so_suff = "_aicpu.so";
const string aicpu_host_so_suff = "_online.so";
if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) {
ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff,
aicpu_host_so_suff);
} else {
FindParserSo(full_name, file_list, caffe_parser_path);
}
}
closedir(dir);
mmScandirFree(entries, ret);
}

void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) {
@@ -159,8 +160,9 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
fmk_type = ge::TypeUtils::FmkTypeToSerialString(type);
GELOGI("Framework type is %s.", fmk_type.c_str());

const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
char path_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("ASCEND_OPP_PATH", path_env, MMPA_MAX_PATH);
if (res == EN_OK) {
std::string path = path_env;
customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type);
GELOGI("Get custom so path from env : %s", path_env);
@@ -210,9 +212,11 @@ void TBEPluginManager::LoadPluginSo(const std::map<string, string> &options) {
for (auto elem : file_list) {
StringUtils::Trim(elem);

void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
void *handle = mmDlopen(elem.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL | MMPA_RTLD_NODELETE);
if (handle == nullptr) {
GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror());
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), error);
} else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) {
// Close dl when the program exist, not close here
GELOGI("Plugin load %s success.", elem.c_str());


+ 0
- 1
ge/common/ge/tbe_plugin_manager.h View File

@@ -17,7 +17,6 @@
#ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_
#define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_

#include <dlfcn.h>
#include <functional>
#include <iostream>
#include <map>


+ 4
- 2
ge/common/ge_common.mk View File

@@ -110,11 +110,12 @@ LOCAL_SHARED_LIBRARIES := \
libascend_protobuf \
libc_sec \
libslog \
libmmpa \
libgraph \
libregister \
liberror_manager \

LOCAL_STATIC_LIBRARIES += libmmpa

LOCAL_LDFLAGS := -lrt -ldl

include $(BUILD_HOST_SHARED_LIBRARY)
@@ -152,11 +153,12 @@ LOCAL_SHARED_LIBRARIES := \
libascend_protobuf \
libc_sec \
libslog \
libmmpa \
libgraph \
libregister \
liberror_manager \

LOCAL_STATIC_LIBRARIES += libmmpa

ifeq ($(device_os),android)
LOCAL_LDFLAGS += -ldl
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog


+ 2
- 4
ge/common/helper/model_cache_helper.cc View File

@@ -14,8 +14,6 @@
* limitations under the License.
*/

#include <fcntl.h>
#include <unistd.h>
#include <climits>
#include <cstdio>
#include <fstream>
@@ -448,12 +446,12 @@ Status ModelCacheHelper::SaveJsonToFile(const string &file_name, const Json &jso
}
const string path = cache_path_ + file_name;
const int FILE_AUTHORITY = 0600;
int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY);
int fd = mmOpen2(path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, FILE_AUTHORITY);
if (fd < 0) {
GELOGW("Fail to open the file: %s.", path.c_str());
return INTERNAL_ERROR;
}
if (close(fd) != 0) {
if (mmClose(fd) != 0) {
GELOGW("Fail to close the file: %s.", path.c_str());
return INTERNAL_ERROR;
}


+ 14
- 17
ge/common/helper/model_helper.cc View File

@@ -98,7 +98,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod

ge::Buffer model_buffer;
(void)model_tmp->Save(model_buffer);
GELOGI("MODEL_DEF size is %zu", model_buffer.GetSize());
GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize());
if (model_buffer.GetSize() > 0) {
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(),
model_buffer.GetSize()) != SUCCESS) {
@@ -107,7 +107,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
}
}
auto ge_model_weight = ge_model->GetWeight();
GELOGI("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData());
// weight is not necessary
if (ge_model_weight.GetSize() > 0) {
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
@@ -117,7 +117,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
}

TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore();
GELOGI("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize());
GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize());
if (tbe_kernel_store.DataSize() > 0) {
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
ModelPartitionType::TBE_KERNELS,
@@ -129,7 +129,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
(void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize());

CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore();
GELOGI("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize());
GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize());
if (cust_aicpu_kernel_store.DataSize() > 0) {
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper,
ModelPartitionType::CUST_AICPU_KERNELS,
@@ -155,8 +155,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
}
(void)model_task_def->SerializePartialToArray(task_buffer.GetData(), static_cast<int>(partition_task_size));

GELOGI("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num());
GELOGI("TASK_INFO size is %zu", partition_task_size);
GELOGD("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num());
GELOGD("TASK_INFO size is %zu", partition_task_size);

if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(),
partition_task_size) != SUCCESS) {
@@ -168,7 +168,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
model_header.platform_type = ge_model->GetPlatformType();
model_header.om_ir_version = ge_model->GetVersion();
std::string platform_version = ge_model->GetPlatformVersion();
GELOGI("Platform version save: %s", platform_version.c_str());

errno_t err;
err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(),
@@ -178,7 +177,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
return MEMALLOC_FAILED;
}
string version = reinterpret_cast<char *>(model_header.platform_version);
GELOGI("Platform version save: %s", version.c_str());
GELOGD("Platform version save: %s", version.c_str());

size_t name_size = ge_model->GetName().size();
name_size = name_size > (MODEL_NAME_LENGTH - 1) ? (MODEL_NAME_LENGTH - 1) : name_size;
@@ -188,7 +187,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod
return MEMALLOC_FAILED;
}
string model_name = reinterpret_cast<char *>(model_header.name);
GELOGI("Model name save:%s", model_name.c_str());
GELOGD("Model name save:%s", model_name.c_str());

Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_);
if (ret != SUCCESS) {
@@ -346,7 +345,7 @@ Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) {
ModelPartition partition_model_def;
// no need to check value, DATA->NetOutput
om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def);
GELOGI("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size);
GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size);

ge::Model model;
if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) {
@@ -376,7 +375,7 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) {
ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size);
model_->SetWeight(weight);

GELOGI("GetWeight size:%u", partition.size);
GELOGD("GetWeight size:%u", partition.size);
return SUCCESS;
}

@@ -393,7 +392,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om
GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed.");
return INTERNAL_ERROR;
}
GELOGI("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num());
GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num());
}
model_->SetModelTaskDef(task);
return SUCCESS;
@@ -404,9 +403,9 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) {
ModelPartition partition_kernel_def;
TBEKernelStore kernel_store;
if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def) == SUCCESS) {
GELOGI("Kernels partition size:%u", partition_kernel_def.size);
GELOGD("Kernels partition size:%u", partition_kernel_def.size);
if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) {
GELOGI("Load tbe kernels success");
GELOGD("Load tbe kernels success");
} else {
GELOGW("Load tbe kernels failed");
}
@@ -420,11 +419,9 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) {
ModelPartition partition_kernel_def;
CustAICPUKernelStore kernel_store;
if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) {
GELOGI("Kernels partition size:%u", partition_kernel_def.size);
GELOGD("Kernels partition size:%u", partition_kernel_def.size);
if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) {
GELOGI("Load cust aicpu kernels success");
} else {
GELOGW("Load cust aicpu kernels failed");
}
}
model_->SetCustAICPUKernelStore(kernel_store);


+ 6
- 6
ge/common/helper/om_file_helper.cc View File

@@ -123,7 +123,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
return ACL_ERROR_GE_EXEC_MODEL_PARTITION_NUM_INVALID;
}
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table);
GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu",
partition_table->num, sizeof(ModelFileHeader), mem_offset);
if (model_data_size <= mem_offset) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u",
@@ -143,7 +143,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint
return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID;
}
mem_offset += partition.size;
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);
GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);
}
return SUCCESS;
}
@@ -167,7 +167,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave
ModelPartition partition = context_.partition_datas_[i];
partition_table->partition[i] = {partition.type, mem_offset, partition.size};
mem_offset += partition.size;
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);
GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size);
}
return partition_table;
}
@@ -191,7 +191,7 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp
(void)save_param.pri_key_file;
Status ret = SaveModelToFile(output_file, model, is_offline);
if (ret == SUCCESS) {
GELOGI("Generate model with encrypt.");
GELOGD("Generate model with encrypt.");
}
return ret;
}
@@ -213,7 +213,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat
FMK_UINT32_ADDCHECK(size_of_table, model_data_len)
model_header_.length = size_of_table + model_data_len;

GELOGI("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu",
GELOGD("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu",
sizeof(ModelFileHeader), size_of_table, model_data_len, model_header_.length + sizeof(ModelFileHeader));

std::vector<ModelPartition> partition_datas = context_.partition_datas_;
@@ -224,7 +224,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat
ret = FileSaver::SaveToBuffWithFileHeader(model_header_, *partition_table, partition_datas, model);
}
if (ret == SUCCESS) {
GELOGI("Save model success without encrypt.");
GELOGD("Save model success without encrypt.");
}
return ret;
#else


+ 2
- 2
ge/common/kernel_store.cc View File

@@ -51,7 +51,7 @@ bool KernelStore::Build() {
kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length());
kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize());

GELOGI("get kernel bin name %s, addr %p, size %u",
GELOGD("get kernel bin name %s, addr %p, size %u",
kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize());
mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head));
GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false);
@@ -95,7 +95,7 @@ bool KernelStore::Load(const uint8_t *data, const size_t &len) {
std::string name(next_buffer, kernel_head->name_len);

next_buffer += kernel_head->name_len;
GELOGI("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len);
GELOGD("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len);
std::vector<char> kernel_bin(next_buffer, next_buffer + kernel_head->bin_len);
KernelBinPtr teb_kernel_ptr = ge::MakeShared<KernelBin>(name, std::move(kernel_bin));
if (teb_kernel_ptr != nullptr) {


+ 1
- 2
ge/common/model_parser/base.cc View File

@@ -17,7 +17,6 @@
#include "common/model_parser/base.h"
#include "common/helper/model_helper.h"
#include <securec.h>
#include <sys/sysinfo.h>
#include <fstream>
#include <memory>
#include <string>
@@ -107,7 +106,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo

model_data = data;
model_len = file_header->length;
GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader));
GELOGD("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader));
} else {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported.");
res = ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION;


+ 6
- 8
ge/common/model_saver.cc View File

@@ -16,9 +16,7 @@

#include "common/model_saver.h"

#include <fcntl.h>
#include <securec.h>
#include <unistd.h>
#include <cstdlib>
#include <fstream>
#include <string>
@@ -51,14 +49,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
return FAILED;
}

char real_path[PATH_MAX] = {0};
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= PATH_MAX, return FAILED, "file path is too long!");
GE_IF_BOOL_EXEC(realpath(file_path, real_path) == nullptr,
char real_path[MMPA_MAX_PATH] = {0};
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= MMPA_MAX_PATH, return FAILED, "file path is too long!");
GE_IF_BOOL_EXEC(mmRealPath(file_path, real_path, MMPA_MAX_PATH) != EN_OK,
GELOGI("File %s does not exit, it will be created.", file_path));

// Open file
mode_t mode = S_IRUSR | S_IWUSR;
int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode);
mmMode_t mode = M_IRUSR | M_IWUSR;
int32_t fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode);
if (fd == EN_ERROR || fd == EN_INVALID_PARAM) {
ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file_path, strerror(errno)});
GELOGE(FAILED, "Open file[%s] failed. %s", file_path, strerror(errno));
@@ -72,7 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi
ErrorManager::GetInstance().ATCReportErrMessage(
"E19004", {"file", "errmsg"}, {file_path, strerror(errno)});
// Need to both print the error info of mmWrite and mmClose, so return ret after mmClose
GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno));
GELOGE(FAILED, "Write to file failed. errno = %ld, %s", mmpa_ret, strerror(errno));
ret = FAILED;
}
// Close file


+ 16
- 12
ge/common/profiling/profiling_manager.cc View File

@@ -214,8 +214,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Pa
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) {
#ifdef DAVINCI_SUPPORT_PROFILING
// enable profiling support two ways: env and front end
const char *profiling_mode = std::getenv("PROFILING_MODE");
const char *prof_options = std::getenv("PROFILING_OPTIONS");
char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 };
char prof_options_temp[MMPA_MAX_PATH] = { 0x00 };
(void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH );
const char *profiling_mode = profiling_mode_temp;
const char *prof_options = prof_options_temp;
if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) {
is_load_profiling_ = false;
is_execute_profiling_ = false;
@@ -554,7 +558,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
return;
}
GELOGI("current logic_device_id:%d", logic_device_id);
GELOGD("current logic_device_id:%d", logic_device_id);
if (check_device) {
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
if (ret == device_id_.end()) {
@@ -562,11 +566,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr
return;
}
}
GELOGI("start ProfilingTaskDescInfo.");
GELOGD("start ProfilingTaskDescInfo.");
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id);
GELOGI("start ProfilingGraphDescInfo.");
GELOGD("start ProfilingGraphDescInfo.");
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id);
GELOGI("Report profiling data for GE end.");
GELOGD("Report profiling data for GE end.");
#endif
}

@@ -855,7 +859,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
}
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num);
GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num);

rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
@@ -874,7 +878,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
GELOGW("Prof start: load model module is invalid.");
}
UpdateDeviceIdModuleMap(kProfStart, module, device_list);
GELOGI("Prof start profiling success.");
GELOGD("Prof start profiling success.");
#endif
return SUCCESS;
}
@@ -897,7 +901,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]);
}
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num);
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
@@ -917,7 +921,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
GELOGW("Prof stop: load model module is invalid.");
}
UpdateDeviceIdModuleMap(kProfStop, module, device_list);
GELOGI("Prof stop profiling success.");
GELOGD("Prof stop profiling success.");
#endif
return SUCCESS;
}
@@ -959,14 +963,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id);
}
GELOGI("Current logic_device_id:%d", logic_device_id);
GELOGD("Current logic_device_id:%d", logic_device_id);

bool execute_model_prof_on = false;
auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id);
if (iter != device_id_.end()) {
execute_model_prof_on = true;
}
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
return is_execute_profiling_ || execute_model_prof_on;
}



+ 5
- 4
ge/common/properties_manager.h View File

@@ -25,13 +25,14 @@

#include "common/dump/dump_properties.h"
#include "graph/op_desc.h"
#include "common/ge_compiler_options.h"

namespace ge {
// Configuration property management
static const char *SYSMODE __attribute__((unused)) = "FMK_SYSMODE";
static const char *USE_FUSION __attribute__((unused)) = "FMK_USE_FUSION";
static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE";
static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG";
static const char *SYSMODE GE_ATTRIBUTE_UNUSED = "FMK_SYSMODE";
static const char *USE_FUSION GE_ATTRIBUTE_UNUSED = "FMK_USE_FUSION";
static const char *TIMESTAT_ENABLE GE_ATTRIBUTE_UNUSED = "DAVINCI_TIMESTAT_ENABLE";
static const char *ANNDROID_DEBUG GE_ATTRIBUTE_UNUSED = "ANNDROID_DEBUG";

class PropertiesManager {
public:


+ 56
- 29
ge/common/util.cc View File

@@ -16,11 +16,12 @@

#include "framework/common/util.h"

#include <fcntl.h>
#include <sys/stat.h>
#ifdef __GNUC__
#include <regex.h>
#include <unistd.h>
#else
#include <regex>
#endif
#include <algorithm>
#include <climits>
#include <cstdlib>
@@ -208,29 +209,30 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::string &directory_path) {
GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty.");
auto dir_path_len = directory_path.length();
if (dir_path_len >= PATH_MAX) {
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"},
{directory_path, std::to_string(PATH_MAX)});
GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX);
if (dir_path_len >= MMPA_MAX_PATH) {
ErrorManager::GetInstance().ATCReportErrMessage(
"E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)});
GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH);
return -1;
}
char tmp_dir_path[PATH_MAX] = {0};
char tmp_dir_path[MMPA_MAX_PATH] = {0};
for (size_t i = 0; i < dir_path_len; i++) {
tmp_dir_path[i] = directory_path[i];
if ((tmp_dir_path[i] == '\\') || (tmp_dir_path[i] == '/')) {
if (access(tmp_dir_path, F_OK) != 0) {
int32_t ret = mmMkdir(tmp_dir_path, S_IRUSR | S_IWUSR | S_IXUSR); // 700
if (mmAccess2(tmp_dir_path, M_F_OK) != EN_OK) {
int32_t ret = mmMkdir(tmp_dir_path, M_IRUSR | M_IWUSR | M_IXUSR); // 700
if (ret != 0) {
if (errno != EEXIST) {
ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path});
GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str());
GELOGW("Can not create directory %s. Make sure the directory exists and writable.",
directory_path.c_str());
return ret;
}
}
}
}
}
int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR); // 700
int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), M_IRUSR | M_IWUSR | M_IXUSR); // 700
if (ret != 0) {
if (errno != EEXIST) {
ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path});
@@ -305,9 +307,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha
}

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() {
struct timeval tv {};
int ret = gettimeofday(&tv, nullptr);
GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret);
mmTimeval tv {};
int ret = mmGetTimeOfDay(&tv, nullptr);
GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret);
auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds
return static_cast<uint64_t>(total_use_time);
}
@@ -347,16 +349,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6

FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) {
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL.");
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
strlen(path) >= PATH_MAX,
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)});
return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH,
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)});
return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH);

// Nullptr is returned when the path does not exist or there is no permission
// Return absolute path when path is accessible
std::string res;
char resolved_path[PATH_MAX] = {0};
if (realpath(path, resolved_path) != nullptr) {
char resolved_path[MMPA_MAX_PATH] = {0};
if (mmRealPath(path, resolved_path, MMPA_MAX_PATH) == EN_OK) {
res = resolved_path;
}

@@ -383,7 +384,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
// A regular matching expression to verify the validity of the input file path
// Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
// File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
#ifdef __GNUC__
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
#else
std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
#endif

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
!ValidateStr(real_path, mode),
@@ -392,7 +397,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const
return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason);

// The absolute path points to a file that is not readable
if (access(real_path.c_str(), R_OK) != 0) {
if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) {
ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"file", "errmsg"}, {file_path.c_str(), strerror(errno)});
GELOGW("Read file[%s] failed, errmsg[%s]", file_path.c_str(), strerror(errno));
return false;
@@ -410,15 +415,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
return false;
}

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage(
"E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)});
return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH,
ErrorManager::GetInstance().ATCReportErrMessage(
"E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)});
return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH);

// A regular matching expression to verify the validity of the input file path
// Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores
// File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.)
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
#ifdef __GNUC__
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$";
#else
std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$";
#endif

GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
!ValidateStr(file_path, mode),
@@ -430,7 +439,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
// Can get absolute path (file exists)
if (!real_path.empty()) {
// File is not readable or writable
if (access(real_path.c_str(), W_OK | F_OK) != 0) {
if (mmAccess2(real_path.c_str(), M_W_OK | M_F_OK) != EN_OK) {
ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {real_path, strerror(errno)});
GELOGW("Write file[%s] failed, errmsg[%s]", real_path.c_str(), strerror(errno));
return false;
@@ -461,6 +470,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const
}

FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode) {
#ifdef __GNUC__
char ebuff[kMaxBuffSize];
regex_t reg;
int cflags = REG_EXTENDED | REG_NOSUB;
@@ -482,6 +492,23 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str

regfree(&reg);
return true;
#else
std::wstring wstr(str.begin(), str.end());
std::wstring wmode(mode.begin(), mode.end());
std::wsmatch match;
bool res = false;

try {
std::wregex reg(wmode, std::regex::icase);
// Matching string part
res = regex_match(wstr, match, reg);
res = regex_search(str, std::regex("[`!@#$%^&*()|{}';',<>?]"));
} catch (std::exception &ex) {
GELOGW("The directory %s is invalid, error: %s.", str.c_str(), ex.what());
return false;
}
return !(res) && (str.size() == match.str().size());
#endif
}

FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) {


+ 1
- 1
ge/engine_manager/dnnengine_manager.cc View File

@@ -433,7 +433,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h
return FAILED;
}
const char *file = file_path.data();
if ((access(file, F_OK)) == -1) {
if ((mmAccess2(file, M_F_OK)) != EN_OK) {
if (engines_map_.size() != 0) {
GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno));
return FAILED;


+ 1
- 15
ge/executor/ge_executor.cc View File

@@ -588,7 +588,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
}

Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGI("unload model %u begin.", model_id);
GELOGD("unload model %u begin.", model_id);
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -630,7 +630,6 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData
// Get input and output descriptor
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
GELOGI("get model desc info begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -665,7 +664,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats);
GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats);

GELOGI("get model desc info end.");
return ge::SUCCESS;
}

@@ -679,7 +677,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
///
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
int32_t &dynamic_type) {
GELOGI("Begin to get dynamic batch info.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -690,8 +687,6 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto
GELOGE(ret, "GetDynamicBatchInfo failed.");
return ret;
}

GELOGI("Get dynamic batch info succ.");
return SUCCESS;
}

@@ -727,7 +722,6 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64
/// @return execute result
///
Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) {
GELOGI("Begin to get user designate shape info.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -739,7 +733,6 @@ Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string>
return ret;
}

GELOGI("Get user designate shape order succ.");
return SUCCESS;
}

@@ -782,7 +775,6 @@ Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType
}

Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) {
GELOGI("Begin to get dynamic batch output shape info");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -792,8 +784,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn
GELOGE(ret, "Get dynamic batch output shape info failed.");
return ret;
}

GELOGI("Get dynamic batch output shape info succ.");
return SUCCESS;
}

@@ -835,8 +825,6 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge
}

Status GeExecutor::CommandHandle(const Command &command) {
GELOGI("command handle begin.");

Status ret = GraphLoader::CommandHandle(command);
if (ret != SUCCESS) {
GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed.");
@@ -904,7 +892,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da
*/
Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size,
void *weight_ptr, size_t weight_size) {
GELOGI("Load model from data begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -945,7 +932,6 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
*/
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
ge::RunModelData &run_output_data, bool async_mode) {
GELOGI("Execute model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;


+ 2
- 2
ge/ge_inference.mk View File

@@ -387,12 +387,12 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES)
LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES)

LOCAL_STATIC_LIBRARIES := libge_memory \
libmmpa \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libascend_protobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \
@@ -451,12 +451,12 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)
LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES)

LOCAL_STATIC_LIBRARIES := libge_memory \
libmmpa \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libascend_protobuf \
libslog \
libmmpa \
libgraph \
libregister \
libresource \


+ 1
- 1
ge/ge_local_engine/engine/host_cpu_engine.cc View File

@@ -221,7 +221,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc,
Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc);
auto ret = op_kernel.Compute(op, named_inputs, named_outputs);
if (ret != GRAPH_SUCCESS) {
GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret);
GELOGW("Failed to compute host cpu op. node = %s", op_desc->GetName().c_str());
return FAILED;
}
op.BreakConnect();


+ 19
- 0
ge/ge_local_engine/module.mk View File

@@ -88,6 +88,25 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for device libge_local_opskernel_builder.so
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libregister \
libgraph

LOCAL_SRC_FILES := $(ops_kernel_builder_src_files)

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_SHARED_LIBRARY}

#compiler for libge_local_opskernel_builder.so in atc
include $(CLEAR_VARS)


+ 2
- 2
ge/ge_local_engine/ops_kernel_store/ge_local_ops_kernel_builder.cc View File

@@ -111,7 +111,7 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) {
TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size);
return FAILED;
}
GELOGI(
GELOGD(
"Calc op[%s:%s] out[%zu] mem size is %ld,"
" format=%s, data_type=%s.",
node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(),
@@ -174,7 +174,7 @@ Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &conte
GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str());
return ret;
}
GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size());
GELOGD("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size());
return ret;
}
} // namespace ge_local


+ 1
- 1
ge/ge_local_engine/ops_kernel_store/op/no_op.cc View File

@@ -24,7 +24,7 @@ namespace ge_local {
NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {}

Status NoOp::Run() {
GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str());
GELOGD("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str());
// Do nothing
return SUCCESS;
}


+ 1
- 1
ge/ge_runner.mk View File

@@ -372,12 +372,12 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES)
LOCAL_STATIC_LIBRARIES := libge_memory \
libadump_server \
libmsprofiler \
libmmpa \

LOCAL_SHARED_LIBRARIES := \
libc_sec \
libascend_protobuf \
libslog \
libmmpa \
libgraph \
libregister \
libge_common \


+ 167
- 57
ge/ge_runtime/task/hccl_task.cc View File

@@ -15,83 +15,56 @@
*/

#include "ge_runtime/task/hccl_task.h"
#include <algorithm>
#include "ge_runtime/task/task_factory.h"
#include "common/opskernel/ops_kernel_info_store.h"
#include "common/opskernel/ge_task_info.h"

namespace ge {
namespace model_runner {
std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<HcclTask::StreamGuard>>>>
HcclTask::model_stream_mapping_;
std::mutex HcclTask::model_stream_mapping_mutex_;

HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info)
: TaskRepeater<HcclTaskInfo>(model_context, task_info),
task_info_(task_info),
stream_(nullptr),
workspace_mem_(nullptr),
rt_model_handle_(nullptr),
priority_(0),
slave_stream_list_(),
hcom_bind_model_(nullptr),
hcom_unbind_model_(nullptr),
hcom_distribute_task_(nullptr) {
secondary_stream_list_() {
if (task_info_ == nullptr) {
GELOGW("task_info_ is null!");
}

hcom_bind_model_ = task_info->hcom_bind_model();
hcom_unbind_model_ = task_info->hcom_unbind_model();

priority_ = model_context.priority();
rt_model_handle_ = model_context.rt_model_handle();
auto stream_list = model_context.stream_list();

if (hcom_bind_model_ != nullptr) {
if (rt_model_handle_list_.insert(rt_model_handle_).second) {
for (auto stream : stream_list) {
(void)hcom_bind_model_(rt_model_handle_, stream);
}
}
}

if (stream_list.size() == 1) {
stream_ = stream_list[0];
} else if (stream_list.size() > task_info->stream_id()) {
stream_ = stream_list[task_info->stream_id()];
} else {
GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size());
GELOGW("Index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size());
}
}

HcclTask::~HcclTask() {
for (size_t i = 0; i < slave_stream_list_.size(); ++i) {
rtError_t rt_ret = rtModelUnbindStream(rt_model_handle_, slave_stream_list_[i]);
if (workspace_mem_ != nullptr) {
rtError_t rt_ret = rtFree(workspace_mem_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Unbind stream from model failed! Index: %zu", i);
}
}

for (size_t i = 0; i < slave_stream_list_.size(); ++i) {
rtError_t rt_ret = rtStreamDestroy(slave_stream_list_[i]);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Destroy stream failed! Index: %zu", i);
}
}

if (hcom_unbind_model_ != nullptr) {
if (rt_model_handle_list_.find(rt_model_handle_) != rt_model_handle_list_.end()) {
(void)hcom_unbind_model_(rt_model_handle_);
(void)rt_model_handle_list_.erase(rt_model_handle_);
GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret);
}
workspace_mem_ = nullptr;
}
}

bool HcclTask::Distribute() {
// No ops kernel info store
hcom_distribute_task_ = task_info_->hcom_distribute_task();
if (hcom_distribute_task_ != nullptr) {
return hcom_distribute_task_(task_info_, stream_);
}

// Ops kernel info store
// Get privateDef and opsKernelStorePtr
GELOGI("get custom info in modelTaskDef");
GELOGI("Get custom info in modelTaskDef");
void *ops_kernel_store = task_info_->ops_kernel_store();
OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store);
if (ops_kernel_store == nullptr) {
@@ -101,25 +74,15 @@ bool HcclTask::Distribute() {

char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data()));
auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size());
GELOGI("the first address of the custom info, privateDef=%p", private_def);

GELOGI("hcclStreamNum =%ld", task_info_->hccl_stream_num());
for (int64_t i = 0; i < task_info_->hccl_stream_num(); ++i) {
rtStream_t stream = nullptr;
rtError_t rt_ret = rtStreamCreateWithFlags(&stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
GELOGI("The first address of the custom info, privateDef=%p", private_def);
SetSecondaryStream();

rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM);
if (task_info_->workspace_size() > 0) {
rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}

GELOGI("hccl_stream addr is=%p", stream);
slave_stream_list_.push_back(stream);
}

GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl.");
@@ -128,17 +91,22 @@ bool HcclTask::Distribute() {
ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL);
ge_task.stream = stream_;

ge_task.kernelHcclInfo = std::vector<GETaskKernelHcclInfo>(1);
ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type();
ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr();
ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr();
ge_task.kernelHcclInfo[0].workSpaceAddr = task_info_->workspace_addr();
ge_task.kernelHcclInfo[0].workSpaceAddr = workspace_mem_;
ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size();
ge_task.kernelHcclInfo[0].count = task_info_->count();
ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type());
ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type());
ge_task.kernelHcclInfo[0].rootId = task_info_->root_id();

ge_task.kernelHcclInfo[0].hcclStreamList = slave_stream_list_;
std::vector<rtStream_t> secondary_stream_list;
std::transform(secondary_stream_list_.begin(), secondary_stream_list_.end(),
std::back_inserter(secondary_stream_list),
[](const std::shared_ptr<StreamGuard> &stream) -> rtStream_t { return stream->GetStream(); });
ge_task.kernelHcclInfo[0].hcclStreamList = secondary_stream_list;

ge_task.privateDef = private_def;
ge_task.privateDefLen = private_def_len;
@@ -151,10 +119,152 @@ bool HcclTask::Distribute() {
return false;
}

GELOGI("call function LoadTask end.");
GELOGI("Call function LoadTask end.");
return true;
}

bool HcclTask::SetSecondaryStream() {
const uint32_t master_stream_id = task_info_->stream_id();
const int64_t hccl_secondary_stream_num = task_info_->hccl_stream_num();
Status ret;
std::lock_guard<std::mutex> lock(model_stream_mapping_mutex_);
if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) {
GELOGI("Need to create map for rt_model_handle_:%p with new mainstream %ld.", rt_model_handle_, master_stream_id);
ret = CreateStream(hccl_secondary_stream_num, master_stream_id);
if (!ret) {
GELOGE(RT_FAILED, "Create hccl stream failed.");
return false;
}
return true;
}

std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map =
model_stream_mapping_.at(rt_model_handle_);
auto iter = master_secondary_stream_map.find(master_stream_id);
if (iter != master_secondary_stream_map.end()) {
std::vector<std::weak_ptr<StreamGuard>> &secondary_stream_vec = iter->second;
auto lock_weak_ptr = [&secondary_stream_vec, this](int64_t index) -> bool {
auto stream = secondary_stream_vec[index].lock();
if (stream == nullptr) {
rtStream_t new_stream = nullptr;
bool ret = CreateStream(rt_model_handle_, &new_stream);
if (!ret) {
GELOGE(FAILED, "CreateStream failed.");
return false;
}
stream = std::make_shared<HcclTask::StreamGuard>(rt_model_handle_, new_stream);
if (stream == nullptr) {
GELOGE(FAILED, "MakeShared failed.");
return false;
}
secondary_stream_vec[index] = stream;
}
secondary_stream_list_.push_back(stream);
return true;
};

if (static_cast<size_t>(hccl_secondary_stream_num) <= secondary_stream_vec.size()) {
GELOGI("Number of secondary stream is enough to be reused.");
for (int64_t i = 0; i < hccl_secondary_stream_num; ++i) {
if (!lock_weak_ptr(i)) {
GELOGE(FAILED, "Lock weak ptr failed.");
return false;
}
}
} else {
GELOGI("Need to reuse secondary stream and create new secondary stream.");
size_t created_stream_num = secondary_stream_vec.size();
for (size_t i = 0; i < secondary_stream_vec.size(); ++i) {
if (!lock_weak_ptr(i)) {
GELOGE(FAILED, "Lock weak ptr failed.");
return false;
}
}
ret = CreateStream(hccl_secondary_stream_num - created_stream_num, master_stream_id);
if (ret != SUCCESS) {
GELOGE(RT_FAILED, "Create hccl stream failed.");
return false;
}
}
GELOGI("Initialize hccl secondary stream success, hccl_secondary_stream_num =%ld", hccl_secondary_stream_num);
} else {
GELOGI("Need to create secondary stream for %s with new mainstream %ld.", task_info_->op_name().c_str(),
master_stream_id);
ret = CreateStream(hccl_secondary_stream_num, master_stream_id);
if (!ret) {
GELOGE(RT_FAILED, "Create hccl stream failed.");
return false;
}
}
return true;
}

bool HcclTask::CreateStream(int64_t stream_num, int64_t master_stream_id) {
GELOGI("Start to create %ld hccl secondary stream.", stream_num);
for (int64_t i = 0; i < stream_num; ++i) {
rtStream_t stream = nullptr;
bool ret = CreateStream(rt_model_handle_, &stream);
if (!ret) {
GELOGE(FAILED, "CreateStream failed.");
return false;
}

GELOGD("hccl_stream addr is=%p", stream);
auto shared_stream = std::make_shared<StreamGuard>(rt_model_handle_, stream);
if (shared_stream == nullptr) {
GELOGE(FAILED, "MakeShared failed.");
return false;
}
SaveHcclSecondaryStream(master_stream_id, shared_stream);
secondary_stream_list_.push_back(shared_stream);
}
GELOGI("CreateStream success.");
return true;
}

bool HcclTask::CreateStream(rtModel_t model, rtStream_t *stream) const {
if (stream == nullptr) {
GELOGE(FAILED, "Output param stream is null.");
return false;
}

rtError_t rt_ret = rtStreamCreateWithFlags(stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
// Create secondary stream, inactive by default, activated by hccl
rt_ret = rtModelBindStream(model, *stream, RT_MODEL_WAIT_ACTIVE_STREAM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
return false;
}
return true;
}

void HcclTask::SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream) {
if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) {
model_stream_mapping_.emplace(rt_model_handle_, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>());
}
std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map =
model_stream_mapping_.at(rt_model_handle_);
master_secondary_stream_map[master_stream_id].emplace_back(stream);
}

HcclTask::StreamGuard::~StreamGuard() {
rtError_t rt_ret = rtModelUnbindStream(model_, stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Unbind stream from model failed!");
return;
}

rt_ret = rtStreamDestroy(stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Destroy stream failed!");
return;
}
}

REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo);
} // namespace model_runner
} // namespace ge

+ 24
- 6
ge/ge_runtime/task/hccl_task.h View File

@@ -19,7 +19,9 @@

#include <memory>
#include <set>
#include <map>
#include <vector>
#include <mutex>
#include "ge_runtime/task/task.h"

namespace ge {
@@ -33,18 +35,34 @@ class HcclTask : public TaskRepeater<HcclTaskInfo> {
bool Distribute() override;

private:
class StreamGuard;
bool SetSecondaryStream();
bool CreateStream(int64_t stream_num, int64_t master_stream_id);
bool CreateStream(rtModel_t model, rtStream_t *stream) const;
void SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream);

std::shared_ptr<HcclTaskInfo> task_info_;
void *stream_;
void *workspace_mem_;
rtModel_t rt_model_handle_;
int32_t priority_;
std::vector<void *> slave_stream_list_;
std::function<bool(void *, void *)> hcom_bind_model_;
std::function<bool(void *)> hcom_unbind_model_;
std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_;
static std::set<rtModel_t> rt_model_handle_list_;
std::vector<std::shared_ptr<StreamGuard>> secondary_stream_list_;
// map<key: model pointer, value: map<key: primary stream id, value: vector<secondary stream pointer>>>
static std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>> model_stream_mapping_;
static std::mutex model_stream_mapping_mutex_;
};

std::set<rtModel_t> HcclTask::rt_model_handle_list_{};
class HcclTask::StreamGuard {
public:
StreamGuard(rtModel_t model, rtStream_t stream) : model_(model), stream_(stream) {}
~StreamGuard();
rtStream_t GetStream() const { return stream_; }

private:
rtModel_t model_;
rtStream_t stream_;
};
} // namespace model_runner
} // namespace ge



+ 1
- 9
ge/generator/ge_generator.cc View File

@@ -200,7 +200,6 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
}

static void GetOpsProtoPath(string &opsproto_path) {
GELOGI("Start to get ops proto path schedule.");
const char *path_env = std::getenv("ASCEND_OPP_PATH");
if (path_env != nullptr) {
string path = path_env;
@@ -383,7 +382,6 @@ bool GeGenerator::Impl::ParseVersion(const std::string &line, std::string &versi
}

version = temp.substr(pos + flag.size());
GELOGI("Version=%s", version.c_str());

return true;
}
@@ -425,7 +423,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) {
path_base = path_base.substr(0, path_base.rfind('/') + 1);

std::string version_path = path_base + "version.info";
GELOGI("version_path is %s", version_path.c_str());
std::string version;
if (!GetVersionFromPath(version_path, version)) {
GELOGW("Get atc version information failed!");
@@ -436,7 +433,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) {
GELOGW("Ge model set atc version failed!");
return false;
}
GELOGI("Ge model set atc version information success.");
return true;
}

@@ -449,7 +445,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
}
std::string version_path = path_env;
version_path += "/version.info";
GELOGI("version_path is %s", version_path.c_str());
std::string version;
if (!GetVersionFromPath(version_path, version)) {
GELOGW("Get opp version information failed!");
@@ -460,7 +455,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) {
GELOGW("Ge model set opp version failed!");
return false;
}
GELOGI("Ge Model set opp version information success.");
return true;
}

@@ -469,7 +463,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
rtContext_t ctx = nullptr;
auto rt = rtCtxGetCurrent(&ctx);
if (rt != RT_ERROR_NONE) {
GELOGW("Current ctx is null.");
GELOGD("Current ctx is null.");
ctx = nullptr;
}

@@ -524,7 +518,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
(void)rtCtxSetCurrent(ctx);
}

GELOGI("GenerateOfflineModel success.");
return SUCCESS;
}

@@ -713,7 +706,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>
return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED;
}

GELOGI("Model inputs size is %zu", inputs.size());
graph_manager_.SetOptionsRunGraphFlag(false);

static std::atomic<uint64_t> atomic_session_id(0);


+ 2
- 6
ge/graph/build/graph_builder.cc View File

@@ -102,7 +102,6 @@ void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) {
}

Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {
GELOGI("Begin to calculate op running param.");
GE_CHECK_NOTNULL(graph);
auto instance_ptr = ge::GELib::GetInstance();
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) {
@@ -140,7 +139,6 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) {

auto parent_node = graph->GetParentNode();
if (parent_node == nullptr) {
GELOGI("Graph[%s] do not have parent node, no need update parent node output size.", graph->GetName().c_str());
return SUCCESS;
}

@@ -189,7 +187,6 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph

Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) {
GELOGI("Start to build model.");
if (comp_graph == nullptr) {
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null.");
return GE_GRAPH_PARAM_NULLPTR;
@@ -267,7 +264,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v
}
GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
"Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str());
GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build");
return SUCCESS;
}
@@ -306,7 +303,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo
}
GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr),
"Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str());
GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str());
GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str());
return SUCCESS;
}

@@ -542,7 +539,6 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc)
}

Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) {
GELOGI("[SecondPartition] second partition.");
GE_TIMESTAMP_START(GraphPartition2);
auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning);
if (ret != SUCCESS) {


+ 1
- 1
ge/graph/build/label_allocator.cc View File

@@ -33,7 +33,7 @@ Status LabelAllocator::AssignFunctionalLabels() {
}

// Add label task for sub graph.
GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str());
GELOGD("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str());
std::set<NodePtr> functional_nodes;
for (auto graph : compute_graph_->GetAllSubgraphs()) {
if (!CollectFunctionalNode(graph, functional_nodes)) {


+ 4
- 4
ge/graph/build/logical_stream_allocator.cc View File

@@ -597,10 +597,10 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
return status;
}

GELOGI("Subgraphs of graph %s:", graph->GetName().c_str());
GELOGD("Subgraphs of graph %s:", graph->GetName().c_str());
for (const auto &subgraph : subgraphs) {
if (subgraph != nullptr) {
GELOGI("subgraph: %s", subgraph->name.c_str());
GELOGD("subgraph: %s", subgraph->name.c_str());
}
}

@@ -664,9 +664,9 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec

Status status = pass->Run(graph, subgraphs, context_);
if (status == SUCCESS) {
GELOGI("Stream pass %s return SUCCESS.", pass->GetName().c_str());
GELOGD("Stream pass %s return SUCCESS.", pass->GetName().c_str());
} else if (status == NOT_CHANGED) {
GELOGI("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
GELOGD("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str());
} else {
GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str());
return status;


+ 2
- 2
ge/graph/build/memory/binary_block_mem_assigner.cc View File

@@ -76,7 +76,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
auto range_number = static_cast<size_t>(
ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase)));
range_number = (range_number == 0) ? 1 : range_number;
GELOGI("Range number: %zu", range_number);
GELOGD("Range number: %zu", range_number);

vector<vector<int64_t>> ranges(range_number);
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
@@ -114,7 +114,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
range_ceils.push_back(range.back());
}
}
GELOGI("Range ceils: %s", ToString(range_ceils).c_str());
GELOGD("Range ceils: %s", ToString(range_ceils).c_str());

return SUCCESS;
}


+ 9
- 10
ge/graph/build/memory/block_mem_assigner.cc View File

@@ -455,12 +455,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
GetNodeWorkSpaceSize(n, temp);
all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end());
}
GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_);
for (const auto &pair : symbol_size_) {
all_memory_size.emplace_back(pair.second);
}
sort(all_memory_size.begin(), all_memory_size.end());
GELOGI("All memory size: %s", ToString(all_memory_size).c_str());
GELOGD("All memory size: %s", ToString(all_memory_size).c_str());

for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
if (*iter == 0) {
@@ -495,7 +494,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) {

bool IsDirectOutputNode(const NodePtr &node, int idx) {
if ((node != nullptr) && (node->GetOpDesc() != nullptr) && (node->GetOpDesc()->GetType() == NETOUTPUT)) {
GELOGI("This is netoutput node, the input node mem can not be reused");
GELOGD("This is netoutput node, the input node mem can not be reused");
return true;
}
return false;
@@ -1102,7 +1101,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i
if (static_cast<uint32_t>(index) == output_index) {
if (node->GetOwnerComputeGraph() != nullptr) {
string graph_name = node->GetOwnerComputeGraph()->GetName();
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%d] streamid[%ld].", graph_name.c_str(),
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(),
op_desc->GetName().c_str(), index, op_desc->GetStreamId());
}
return true;
@@ -1219,7 +1218,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
int64_t stream_id = op_desc->GetStreamId();
vector<int64_t> memorys_type;
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type);
GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(),
GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(),
op_desc->GetOutputsSize(), memorys_type.size());
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) {
GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]",
@@ -1257,7 +1256,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
// fusion: other type's size not means malloc HBM memory
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1;
if (l1_flag) {
GELOGI("fusion: node[%s], output[%s], output memory type [%d]",
GELOGI("fusion: node[%s], output[%s], output memory type [%ld]",
op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]);
size = 0;
}
@@ -1311,7 +1310,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector
///
void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
(void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_);
GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open");
GELOGD("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open");
string op_no_reuse_mem_str;
const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM);
GE_IF_BOOL_EXEC(op_no_reuse_mem != nullptr, op_no_reuse_mem_str = string(op_no_reuse_mem);
@@ -1337,7 +1336,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
vector<bool> workspace_reuse_flag;
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag),
GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str()));
GELOGI("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(),
GELOGD("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(),
temp.size(), tvm_workspace_memory_type.size());

if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) {
@@ -1350,7 +1349,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) {
bool workspace_skip_flag = false;
if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) {
GELOGI(
"fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]",
"fusion: node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]",
node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]);
workspace_skip_flag = true;
}
@@ -1628,7 +1627,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
memory_block->SetTailOffset(p2p_mem_offset_ - 1);
}
}
GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.",
GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.",
mem_offset_, p2p_mem_offset_);
}



+ 5
- 8
ge/graph/build/memory/graph_mem_assigner.cc View File

@@ -117,7 +117,7 @@ Status GraphMemoryAssigner::AssignMemory() {
return ge::FAILED;
}
int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign;
GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);
GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign);

mem_assigner_ = std::move(mem_assigner);

@@ -296,7 +296,6 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse
mem_offset[RT_MEMORY_HBM] += memory_block->Size();
memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1);
}
GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]);

// set offset for zero copy nodes
priority_assigner->SetOpMemOffset(true);
@@ -309,14 +308,13 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse
}
iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM];

GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp,
zero_mem_copy_size);

return SUCCESS;
}

Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
GELOGI("Begin to reassign continuous memory");
Status ret;
for (auto &node : compute_graph_->GetAllNodes()) {
// Get the continuous input type of the node, default is false
@@ -387,7 +385,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) {
}
}
for (auto pair : memory_offset_) {
GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first,
pair.second.mem_offset_);
}
return ge::SUCCESS;
@@ -456,7 +454,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node,
output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE;
}
GELOGI(
"[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] "
"[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] "
"real_size[%u].",
node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(),
peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(),
@@ -834,7 +832,6 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt
string max_batch_label;
GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label),
"Get max batch label failed.");
GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str());
PrintMemoryOffset();
vector<size_t> nodes_mem_offset_list;
for (auto &i_map : mem_reuse_nodes_map) {
@@ -1507,7 +1504,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector<
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset));
}

GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]",
has_mem_type_attr == true ? "Fusion" : "",
tmp_op_desc->GetName().c_str(),
valid_input_index,


+ 2
- 2
ge/graph/build/memory/hybrid_mem_assigner.cc View File

@@ -62,9 +62,9 @@ Status HybridMemAssigner::Assign() {

std::unique_ptr<BlockMemAssigner> priority_assigner;

GELOGI("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size);
GELOGD("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size);
if (bin_mem_size <= max_mem_size) {
GELOGI("Use binary-block memory assigner method");
GELOGD("Use binary-block memory assigner method");
priority_assigner = std::move(binary_assigner);
} else {
GELOGI("Use max-block memory assigner method");


+ 1
- 8
ge/graph/build/model_builder.cc View File

@@ -189,7 +189,6 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) {
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
const auto &src_node = peer_out_anchor->GetOwnerNode();
if (!NodeUtils::GetConstOpType(src_node, const_type)) {
GELOGI("Node %s:%zu, sorce node: %s Not Const", n->GetName().c_str(), index, src_node->GetName().c_str());
continue;
}

@@ -232,7 +231,6 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_

Status ModelBuilder::SetInputOutputDesc() {
Status ret;
GELOGI("Start to SetInputOutputDesc.");

for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
auto node_op_desc = n->GetOpDesc();
@@ -245,7 +243,6 @@ Status ModelBuilder::SetInputOutputDesc() {
// final graph.
if ((GetLocalOmgContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) &&
((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) {
GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str());
auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr();
auto outputDescsPtr = node_op_desc->GetAllOutputsDescPtr();
ge::Format format = ge::FORMAT_ND;
@@ -290,7 +287,7 @@ void ModelBuilder::AddNodeInputProperty() {
vector<int64_t> src_index_list;
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr!"); continue);
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
GE_IF_BOOL_EXEC(node_op_desc->HasAttr(MERGE_PRENODE_FLAG), continue);

ge::NodePtr src_node = peer_out_anchor->GetOwnerNode();
@@ -347,7 +344,6 @@ void ModelBuilder::AddNodeInputProperty() {
}

Status ModelBuilder::AdjustInputTensorFlag() {
GELOGI("Start to AdjustInputTensorFlag.");
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) {
GELOGD("Data node: %s.", n->GetName().c_str());
@@ -441,7 +437,6 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) {
return FAILED);
const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_);
bool is_op_debug = dump_properties.IsOpDebugOpen();
GELOGI("Get op debug:%d", is_op_debug);
if (is_op_debug) {
if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) {
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed.");
@@ -608,7 +603,6 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) {
}
tbe_name_set.insert(tbe_kernel->GetName());
tbe_kernel_store_.AddTBEKernel(tbe_kernel);
GELOGI("Add tbe kernel bin %s", tbe_kernel->GetName().c_str());
}

for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) {
@@ -678,7 +672,6 @@ Status ModelBuilder::PreBuildModel() {
GELOGE(FAILED, "Graph_ is not valid.");
return FAILED;
}
GELOGI("BuildModel begin.");

GE_CHK_STATUS_RET(SetInputOutputDesc(), "SetInputOutputDesc Failed!");



+ 6
- 6
ge/graph/build/run_context.cc View File

@@ -140,7 +140,7 @@ void RunContextUtil::DestroyRtModelResources() noexcept {

Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer,
const uint64_t session_id) {
GELOGI("Begin to Create RunContext, session_id = %lu", session_id);
GELOGD("Begin to Create RunContext, session_id = %lu", session_id);
// check params
if (graph == nullptr) {
GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id);
@@ -152,21 +152,21 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra
GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}
GELOGI("Stream_num = %u", stream_num);
GELOGD("Stream_num = %u", stream_num);

uint32_t event_num = 0;
if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) {
GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}
GELOGI("Event_num = %u", event_num);
GELOGD("Event_num = %u", event_num);

uint32_t label_num = 0;
if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) {
GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id);
return INTERNAL_ERROR;
}
GELOGI("Label_num = %u", label_num);
GELOGD("Label_num = %u", label_num);

Status ret = CreateRtModelResources(stream_num, event_num, label_num);
if (ret != SUCCESS) {
@@ -198,11 +198,11 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra

void RunContextUtil::PrintMemInfo() {
for (auto iter : mem_type_to_data_mem_base_) {
GELOGI("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second);
GELOGD("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second);
}

for (auto iter : mem_type_to_data_mem_size_) {
GELOGI("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second);
GELOGD("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second);
}
}



+ 4
- 9
ge/graph/build/stream_allocator.cc View File

@@ -67,11 +67,10 @@ StreamAllocator::StreamAllocator(ComputeGraphPtr whole_graph, const Graph2SubGra
}

enable_single_stream_ = (single_stream_str == kTrueStr) ? true : false;
GELOGI("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr);
GELOGD("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr);
}

Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &max_parallel_num, bool hcom_parallel) {
GELOGI("Assign logical streams start.");
GE_CHECK_NOTNULL(whole_graph_);
GE_DUMP(whole_graph_, "BeforeAssignedLogicalStreams");

@@ -92,15 +91,12 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m
return status;
}
GE_DUMP(whole_graph_, "AfterAssignedLogicalStreams");
GELOGI("Assign logical streams success.");

return SUCCESS;
}

// After allocating the logical stream in the graph, refresh the stream in the
// graph and insert the synchronization node.
Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_num) {
GELOGI("RefreshRealStream start.");
GE_CHECK_NOTNULL(whole_graph_);
GE_DUMP(whole_graph_, "BeforeRefreshRealStream");

@@ -174,8 +170,7 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu
GELOGI("None of nodes need to assign stream, stream num is 0, it will cause error, so change it to 1");
stream_num_ = 1;
}
GELOGI("stream num: %ld, event num: %u.", stream_num_, event_num_);
GELOGI("RefreshRealStream successfully.");
GELOGD("stream num: %ld, event num: %u.", stream_num_, event_num_);

stream_num = stream_num_;
event_num = static_cast<int64_t>(event_num_);
@@ -1241,7 +1236,7 @@ void StreamAllocator::DumpEvents() {

for (const auto &one_pair : after_refresh_stream_nodes) {
int64_t stream_id = one_pair.first;
GELOGI("After RefreshRealStream: stream %ld.", stream_id);
GELOGD("After RefreshRealStream: stream %ld.", stream_id);

for (const auto &node : one_pair.second) {
string send_event_str;
@@ -1273,7 +1268,7 @@ Status StreamAllocator::GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stre
GELOGE(FAILED, "Get max stream and task count by rts failed.");
return FAILED;
}
GELOGI("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count);
GELOGD("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count);

return SUCCESS;
}


+ 4
- 6
ge/graph/build/stream_graph_optimizer.cc View File

@@ -30,7 +30,7 @@ StreamGraphOptimizer::~StreamGraphOptimizer() {}

void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map) {
size_t node_size = comp_graph->GetAllNodesSize();
GELOGI("Refresh placeholder and end nodeId start from node num: %zu", node_size);
GELOGD("Refresh placeholder and end nodeId start from node num: %zu", node_size);
for (const auto &subgraph_pair : subgraph_map) {
for (const auto &subgraph_info : subgraph_pair.second) {
ComputeGraphPtr subgraph = subgraph_info->GetSubGraph();
@@ -74,8 +74,6 @@ bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) {
Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp_graph,
Graph2SubGraphInfoList &subgraph_map,
struct RunContext &run_context) {
GELOGI("Optimize streamed subgraph start.");

RefreshNodeId(comp_graph, subgraph_map);

std::shared_ptr<GELib> instance = ge::GELib::GetInstance();
@@ -86,7 +84,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
ComputeGraphPtr subgraph = subgraph_info->GetSubGraph();
GE_CHECK_NOTNULL(subgraph);

GELOGI("Optimize subgraph %s", subgraph->GetName().c_str());
GELOGD("Optimize subgraph %s", subgraph->GetName().c_str());

std::string engine_name = subgraph_info->GetEngineName();

@@ -128,7 +126,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret);
return ret;
}
GELOGI(
GELOGD(
"[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph "
"Optimizer num: %zu!",
subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size());
@@ -137,7 +135,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
}
}

GELOGI("Optimize streamed subgraph success.");
GELOGD("Optimize streamed subgraph success.");
return SUCCESS;
}
} // namespace ge

+ 5
- 6
ge/graph/build/task_generator.cc View File

@@ -68,7 +68,7 @@ TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) {
TaskGenerator::~TaskGenerator() {}

Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context) {
GELOGI("Begin to Get TaskInfo. session_id=%lu", session_id);
GELOGD("Begin to Get TaskInfo. session_id=%lu", session_id);
// Check params
if (graph == nullptr) {
GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id);
@@ -120,7 +120,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t
return ret;
}

GELOGI("Get TaskInfo success. session_id=%lu", session_id);
GELOGD("Get TaskInfo success. session_id=%lu", session_id);
return SUCCESS;
}

@@ -232,7 +232,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion
}
}
}
GELOGI("Fusion: get fusion group numbers [%zu].", fusion_nodes.size());
GELOGD("Fusion: get fusion group numbers [%zu].", fusion_nodes.size());
return SUCCESS;
}

@@ -575,7 +575,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
continuous_op_lists.back().emplace_back(op_desc);
}
}
GELOGI("Number of continuous node lists is %zu.", continuous_op_lists.size());
GELOGD("Number of continuous node lists is %zu.", continuous_op_lists.size());

for (const auto &continuous_ops : continuous_op_lists) {
map<string, std::pair<OpDescPtr, OpDescPtr>> first_and_last_ops;
@@ -846,13 +846,12 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint

Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point,
vector<uint32_t> &all_reduce_nodes) const {
GELOGI("Start FindProfilingTaskIndex.");
GE_CHECK_NOTNULL(graph);
const char *profiling_mode = std::getenv(kProfilingMode);
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() ||
ProfilingManager::Instance().ProfilingTrainingTraceOn();
if (!is_profiling) {
GELOGW("Profiling is not open.");
GELOGD("Profiling is not open.");
return SUCCESS;
}



+ 1
- 1
ge/graph/execute/graph_execute.cc View File

@@ -115,7 +115,7 @@ Status GraphExecutor::FreeInOutBuffer() {
malloc_flag_ = false;
return SUCCESS;
} else {
GELOGI("[GraphManager] not malloc buffer.");
GELOGD("[GraphManager] not malloc buffer.");
return SUCCESS;
}
}


+ 1
- 1
ge/graph/load/graph_loader.cc View File

@@ -286,7 +286,7 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn
return ret;
}

GELOGI("Execute model success, model_id:%u.", model_id);
GELOGD("Execute model success, model_id:%u.", model_id);
return SUCCESS;
}



+ 1
- 1
ge/graph/load/new_model_manager/cpu_queue_schedule.cc View File

@@ -131,7 +131,7 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
for (const auto &virtual_args_addr : virtual_args_addrs) {
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index));
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
}
}
index++;


+ 3
- 15
ge/graph/load/new_model_manager/data_dumper.cc View File

@@ -159,7 +159,6 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_
}

void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
GELOGI("Start to save data %s message", node->GetName().c_str());
if (node != nullptr) {
auto input_op_desc = node->GetOpDesc();
if (input_op_desc == nullptr) {
@@ -180,7 +179,6 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
{op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
}
}
GELOGI("Save data message successfully");
}
}

@@ -218,7 +216,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr
GELOGW("Get input size failed");
return;
}
GELOGI("Save dump op info, the input size is %ld", input_size);
GELOGD("Save dump op info, the input size is %ld", input_size);
op_desc_info.input_size.emplace_back(input_size);
}
for (size_t j = 0; j < op->GetOutputsSize(); ++j) {
@@ -234,7 +232,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr
GELOGW("Get input size failed");
return;
}
GELOGI("Save dump op info, the output size is %ld", output_size);
GELOGD("Save dump op info, the output size is %ld", output_size);
op_desc_info.output_size.emplace_back(output_size);
}
op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op);
@@ -301,22 +299,16 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin
if (step_id != 0) {
GELOGI("step_id exists.");
op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
} else {
GELOGI("step_id is null.");
}

if (loop_per_iter != 0) {
GELOGI("loop_per_iter exists.");
op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
} else {
GELOGI("loop_per_iter is null.");
}

if (loop_cond != 0) {
GELOGI("loop_cond exists.");
op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
} else {
GELOGI("loop_cond is null.");
}
}

@@ -672,7 +664,7 @@ Status DataDumper::LoadDumpInfo() {
PrintCheckLog(dump_list_key);

if (op_list_.empty()) {
GELOGW("op_list_ is empty");
GELOGD("op_list_ is empty");
}

aicpu::dump::OpMappingInfo op_mapping_info;
@@ -684,8 +676,6 @@ Status DataDumper::LoadDumpInfo() {
op_mapping_info.set_flag(kAicpuLoadFlag);
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep());
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info",
dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str());
auto ret = BuildTaskInfo(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(ret, "Build task info failed");
@@ -812,7 +802,6 @@ void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void

Status DataDumper::UnloadDumpInfo() {
if (!load_flag_) {
GELOGI("No need to UnloadDumpInfo.");
load_flag_ = false;
return SUCCESS;
}
@@ -838,7 +827,6 @@ Status DataDumper::UnloadDumpInfo() {
void DataDumper::PrintCheckLog(string &dump_list_key) {
std::set<std::string> model_list = dump_properties_.GetAllDumpModel();
if (model_list.empty()) {
GELOGI("No model need dump.");
return;
}



+ 79
- 87
ge/graph/load/new_model_manager/davinci_model.cc View File

@@ -17,11 +17,7 @@
#include "graph/load/new_model_manager/davinci_model.h"

#include <cce/dnn.h>
#include <dlfcn.h>
#include <graph/utils/node_utils.h>
#include <pthread.h>
#include <sched.h>
#include <sys/prctl.h>
#include <algorithm>
#include <map>
#include <utility>
@@ -206,7 +202,6 @@ DavinciModel::~DavinciModel() {

OpDebugUnRegister();

GELOGI("do ReleaseTask");
ReleaseTask();
CleanTbeHandle();

@@ -337,7 +332,6 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
weights_mem_base_, weights_size);
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
GELOGI("copy weights data to device");
}

GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
@@ -488,7 +482,7 @@ Status DavinciModel::SetTSDevice() {
int64_t value = 0;
bool ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_CORE_TYPE, value);
uint32_t core_type = ret ? static_cast<uint32_t>(value) : 0;
GELOGI("SetTSDevice: %u", core_type);
GELOGD("SetTSDevice: %u", core_type);
rtError_t rt_ret = rtSetTSDevice(core_type);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret);
@@ -543,7 +537,6 @@ Status DavinciModel::OpDebugRegister() {
}

void DavinciModel::OpDebugUnRegister() {
GELOGI("OpDebugUnRegister, is_op_debug_reg_ = %d", is_op_debug_reg_);
if (is_op_debug_reg_) {
debug_reg_mutex_.unlock();
rtError_t rt_ret = RT_ERROR_NONE;
@@ -648,7 +641,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed.");

GE_TIMESTAMP_START(InitModelMem);
GELOGI("Known node is %d", known_node_);
GELOGD("Known node is %d", known_node_);
if (!known_node_) {
GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size));
data_inputer_ = new (std::nothrow) DataInputer();
@@ -708,7 +701,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
}

Shrink();
GELOGI("Davinci model init success.");
return ret;
}

@@ -968,18 +960,21 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc);
const vector<void *> virtual_addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc);
const vector<int64_t> output_offset_list = op_desc->GetOutputOffset();
if (output_offset_list.size() != virtual_addr_list.size()) {
GELOGE(PARAM_INVALID, "virtual_addr size:%zu should be equal to offset size:%zu.", virtual_addr_list.size(),
output_offset_list.size());
if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size()) ||
(output_offset_list.size() != virtual_addr_list.size())) {
GELOGE(PARAM_INVALID, "Data[%s] init failed: output size is %zu, virtual_addr size is %zu, offset size is %zu.",
op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size());
return PARAM_INVALID;
}
auto data_index = data_op_index;
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) {
GELOGI("ge_train: get new index %u, old %u", data_index, data_op_index);
GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index);
}
bool fusion_flag = false;
ZeroCopyOffset zero_copy_offset;
Status ret = zero_copy_offset.InitInputDataInfo(output_size_list, virtual_addr_list, op_desc, fusion_flag);
int64_t data_size = output_size_list[kDataIndex];
void *virtual_addr = virtual_addr_list[kDataIndex];
Status ret = zero_copy_offset.InitInputDataInfo(data_size, virtual_addr, op_desc, fusion_flag);
if (ret != SUCCESS) {
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str());
return PARAM_INVALID;
@@ -996,7 +991,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma
new_input_outside_addrs_[addr] = zero_copy_offset;
}

GELOGI("SetInputOutsideAddr success.");
data_op_index++;
if (InitInputZeroCopyNodes(node) != SUCCESS) {
GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!");
@@ -1131,7 +1125,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
DisableZeroCopy(real_addr);
real_virtual_addrs_.insert(real_addr);
}
GELOGI("SetOutputOutsideAddr success.");
}

GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS,
@@ -1147,8 +1140,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
}

GELOGI("DavinciModel::InitNetoutput success.");
return SUCCESS;
}

@@ -1459,7 +1450,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) {
return INTERNAL_ERROR;
}
if (label_index >= LabelNum()) {
GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %zu.", label_index, LabelNum());
GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %u.", label_index, LabelNum());
return INTERNAL_ERROR;
}
if (label_id_indication_.count(label_index) > 0) {
@@ -1968,7 +1959,6 @@ void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_i
if (op->GetType() != NETOUTPUT) {
continue;
}
GELOGI("Start to get dynamic output dims attr");
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) {
GELOGD("Can not get dynamic output dims attr");
}
@@ -2124,7 +2114,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD
}

Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) {
GELOGI("Output node size: %zu", output_op_list_.size());
GELOGD("Output node size: %zu", output_op_list_.size());
for (size_t i = 0; i < output_op_list_.size(); i++) {
auto &op_desc = output_op_list_[i];
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
@@ -2187,7 +2177,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data
void *mem_addr = data.second.GetBasicAddr();
void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data));
uint64_t data_buf_length = data_buf.length;
GELOGI("[IMAS]CopyPlainData memcpy graph_%lu type[F] input[%lu] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]",
runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length);
GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind));
}
@@ -2235,8 +2225,6 @@ Status DavinciModel::SinkModelProfile() {
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter();
GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS);

GELOGI("Start collect model load profiling data.");

Msprof::Engine::ReporterData reporter_data{};
// report model data tag name
std::string tag_name;
@@ -2294,7 +2282,6 @@ Status DavinciModel::SinkModelProfile() {
uint32_t op_num = fusion_op_info->original_op_names.size();
uint32_t task_id = task->GetTaskID();
if (op_num > 0) {
GELOGI("task.id = %u, opNum = %u", task_id, op_num);
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id));
}
}
@@ -2552,21 +2539,23 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r
if (is_dynamic_) {
GELOGI("No need to check output data size.");
} else if (buffer.length < mem_size) {
GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length);
GELOGE(FAILED, "Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length);
return FAILED;
} else if (buffer.length > mem_size) {
GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length);
GELOGW("Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length);
}
int64_t data_size = output.second.GetDataSize();

if (is_online_infer_dynamic_) {
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx];
data_size = gear_and_real_out_size_info[cur_dynamic_dims_];
if (merge_nodes_gear_and_real_out_size_info_.find(idx) != merge_nodes_gear_and_real_out_size_info_.end()) {
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx];
data_size = gear_and_real_out_size_info[cur_dynamic_dims_];
}
}
uint64_t buffer_length = buffer.length;
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data));

GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%ld] datasize[%u]",
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]",
runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length);
GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind));
idx++;
@@ -2598,11 +2587,13 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data
return ret);
std::vector<int64_t> output_shape = input_desc->GetShape().GetDims();
if (is_online_infer_dynamic_) {
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i];
size = gear_and_real_out_size_info[cur_dynamic_dims_];
auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i];
output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_];
is_dynamic_ = true;
if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) {
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i];
size = gear_and_real_out_size_info[cur_dynamic_dims_];
auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i];
output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_];
is_dynamic_ = true;
}
}
GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str());
out_buffer_size_vec.push_back(size);
@@ -2759,16 +2750,6 @@ void *DavinciModel::Run(DavinciModel *model) {

InputData current_data = data_wrapper->GetInput();
GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id, current_data.index);
if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) {
model->cur_dynamic_dims_.clear();
GE_IF_BOOL_EXEC(current_data.blobs.empty(), break);
auto shape_data_buffer_data = current_data.blobs.back().data;
auto shape_data_buffer_length = current_data.blobs.back().length;
model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data),
reinterpret_cast<int64_t *>(shape_data_buffer_data) +
shape_data_buffer_length / sizeof(int64_t));
GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
}
GE_TIMESTAMP_START(Model_SyncVarData);
ret = model->SyncVarData();
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(
@@ -2785,6 +2766,18 @@ void *DavinciModel::Run(DavinciModel *model) {
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput());
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
continue, "Copy input data to model failed."); // [No need to check value]
if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) {
model->cur_dynamic_dims_.clear();
GE_IF_BOOL_EXEC(current_data.blobs.empty(), break);
auto shape_data_buffer_data = current_data.blobs.back().data;
auto shape_data_buffer_length = current_data.blobs.back().length;
model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data),
reinterpret_cast<int64_t *>(shape_data_buffer_data) +
shape_data_buffer_length / sizeof(int64_t));
GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
delete[] (int64_t *)current_data.blobs.back().data;
current_data.blobs.pop_back();
}
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START));
if (ProfilingManager::Instance().ProfilingOpTraceOn()) {
@@ -2982,7 +2975,7 @@ void DavinciModel::UnbindTaskSinkStream() {
Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) {
GELOGI("DavinciModel::CreateKnownZeroCopyMap in.");
if (inputs.size() > data_op_list_.size()) {
GELOGE(FAILED, "input data addr %u should less than input op number %u.", inputs.size(), data_op_list_.size());
GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size());
return FAILED;
}
// remove zero copy addr in last iteration
@@ -2991,16 +2984,16 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const
for (size_t i = 0; i < inputs.size(); ++i) {
const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]);
knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i];
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %d,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]);
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]);
}
if (output_op_list_.size() < kOutputNum) {
GELOGW("output op num in graph is %u.", output_op_list_.size());
GELOGW("output op num in graph is %zu.", output_op_list_.size());
return SUCCESS;
}
const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]);
for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) {
knonw_output_data_info_[addr_list[i]] = outputs[i];
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]);
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]);
}
GELOGI("DavinciModel::CreateKnownZeroCopyMap success.");
return SUCCESS;
@@ -3010,13 +3003,13 @@ Status DavinciModel::UpdateKnownZeroCopyAddr() {
for (size_t i = 0; i < total_io_addrs_.size(); ++i) {
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]);
if (it_in != knonw_input_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %d,v addr %p,p addr %p .", i, total_io_addrs_[i],
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_input_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]);
}
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]);
if (it_out != knonw_output_data_info_.end()) {
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %d,v addr %p,p addr %p .", i, total_io_addrs_[i],
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i],
knonw_output_data_info_.at(total_io_addrs_[i]));
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]);
}
@@ -3037,7 +3030,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
if (task != nullptr) {
Status ret = task->UpdateArgs();
if (ret != SUCCESS) {
GELOGE(FAILED, "task %d created by davinci model is nullptr.", task_index);
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index);
return FAILED;
}
}
@@ -3066,7 +3059,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
}

Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
GELOGI("InitTaskInfo in, task size %zu", model_task_def.task().size());
GELOGI("InitTaskInfo in, task size %d", model_task_def.task().size());
task_list_.resize(model_task_def.task_size());
for (int i = 0; i < model_task_def.task_size(); ++i) {
// dynamic shape will create task_list_ before
@@ -3142,14 +3135,14 @@ Status DavinciModel::DistributeTask() {

task_desc_info_.clear();
bool flag = GetL1FusionEnableOption();
char *skt_enable_env = std::getenv("SKT_ENABLE");
int64_t env_flag = (skt_enable_env != nullptr) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0;
if (env_flag != 0) {
flag = true;
}

const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
GELOGI("there are %zu task need to save.", task_list_.size());
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
auto &task = task_list_.at(task_index);
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
@@ -3331,7 +3324,7 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64

if (input_size > op_size) {
GELOGW(
"Input size [%u] is bigger than om size need [%u], "
"Input size [%ld] is bigger than om size need [%ld], "
"MAY cause inference result ERROR, please check model input",
input_size, op_size);
}
@@ -3413,7 +3406,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> &

for (const auto &data : data_info) {
if (data.first >= blobs.size()) { // check data index.
GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu",
GELOGE(FAILED, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu",
input_or_output.c_str(), data.first, blobs.size());
return FAILED;
}
@@ -3522,7 +3515,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) {

GeTensor *tensor = const_cast<GeTensor *>(v_weights[0].get());
GE_IF_BOOL_EXEC(static_cast<size_t>(v_output_size[0]) < tensor->GetData().size(),
GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0],
GELOGE(PARAM_INVALID, "output size:%ld less than weight data size:%zu", v_output_size[0],
tensor->GetData().size());
return PARAM_INVALID;);

@@ -3546,12 +3539,12 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) {
uint64_t offset = static_cast<uint64_t>(elem_num * kBytes);

uint64_t hbm_raw_data_base_addr =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset;
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset;
for (int64_t i = elem_num - 1; i >= 0; --i) {
buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]);
}
}
GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%u] datasize[%zu]",
GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%lu] datasize[%zu]",
runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr[0], v_output_size[0],
tensor->GetData().size());
GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], tensor->GetData().data(), tensor->GetData().size(),
@@ -3582,12 +3575,12 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
if (rtQueryFunctionRegistered(bin_file_key) != RT_ERROR_NONE) {
void *bin_handle = nullptr;
if (!kernel_store.FindTBEHandle(bin_file_key, bin_handle)) {
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key);
GELOGD("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key);

rtDevBinary_t binary;
std::string json_string;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string),
GELOGI("Get original type of session_graph_id."));
GELOGD("Get original type of session_graph_id."));
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
@@ -3603,13 +3596,13 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {
binary.data = tbe_kernel->GetBinData();
binary.length = tbe_kernel->GetBinDataSize();

GELOGI("TBE: binary.length: %lu", binary.length);
GELOGD("TBE: binary.length: %lu", binary.length);
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle));

std::string meta_data;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data),
GELOGI("Get original type of json_string"));
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GELOGD("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));

kernel_store.StoreTBEHandle(bin_file_key, bin_handle, tbe_kernel);
@@ -3620,8 +3613,7 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) {

std::string kernel_name;
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name),
GELOGI("Get original type of kernel_name"));
GELOGI("TBE: binfile_key=%s, kernel_name=%s", bin_file_key, kernel_name.c_str());
GELOGD("Get original type of kernel_name"));
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0));
used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1.
return SUCCESS;
@@ -3816,7 +3808,7 @@ Status DavinciModel::InitModelStream(rtStream_t stream) {
Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data,
OutputData &output_data) {
is_async_mode_ = async_mode;
GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_);
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
is_dynamic_ = input_data.is_dynamic_batch;
if (!is_dynamic_) {
@@ -3828,7 +3820,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
model_id_);

GELOGI("current_data.index=%u", input_data.index);
GELOGD("current_data.index=%u", input_data.index);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));

if (!task_list_.empty()) {
@@ -3837,7 +3829,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
GELOGI("rtModelExecute end");
GELOGD("rtModelExecute end");
}

if (!is_async_mode_) {
@@ -3849,7 +3841,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa

// report model time data
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data));
GELOGI("Model run end, model id:%u", model_id_);
GELOGD("Model run end, model id:%u", model_id_);
return SUCCESS;
}

@@ -3906,7 +3898,9 @@ Status DavinciModel::InitEntryTask() {
uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) {
uint8_t *mem_base = nullptr;
const string purpose("feature map,used for op input and output.");
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize());
string memory_key = std::to_string(0) + "_f";
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId());
@@ -3936,7 +3930,9 @@ uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) {
uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) {
uint8_t *weights_mem_base = nullptr;
const string purpose("weights memory in inference network.");
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
string weight_memory_key = std::to_string(0) + "_w";
weights_mem_base =
MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId());
@@ -3947,7 +3943,9 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) {
}

void DavinciModel::FreeFeatureMapMem() {
if (std::getenv(kEnvGeuseStaticMemory) != nullptr && is_inner_mem_base_) {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK && is_inner_mem_base_) {
string weight_memory_key = std::to_string(0) + "_f";
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()),
@@ -3979,7 +3977,9 @@ void DavinciModel::FreeP2PMem() {
}

void DavinciModel::FreeWeightsMem() {
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) {
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH);
if (res == EN_OK) {
string memory_key = std::to_string(0) + "_w";
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) {
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()),
@@ -3995,7 +3995,6 @@ void DavinciModel::FreeWeightsMem() {
}

Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) {
GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id);
rtContext_t ctx = nullptr;
rtError_t rt_ret = rtCtxGetCurrent(&ctx);
if (rt_ret != RT_ERROR_NONE) {
@@ -4016,13 +4015,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id)

GE_CHK_STATUS_RET_NOLOG(
TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum));

GELOGI("TransAllVarData success.");
return SUCCESS;
}

void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) {
GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_);
data_dumper_.SetModelName(name_);
data_dumper_.SetModelId(model_id_);
data_dumper_.SetOmName(om_name_);
@@ -4048,15 +4044,13 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) {
}
return v_output_addr[0];
}
GELOGW("op is null.");
GELOGD("op is null.");
return nullptr;
};

data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_),
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_),
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_));

GELOGI("SetDataDumperArgs end.");
}

uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) {
@@ -4075,7 +4069,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea
}

Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) {
GELOGI("GetComputeGraphInfo start.");
auto &all_op_desc = data_dumper_.GetAllOpDescInfo();
for (auto &op_desc : all_op_desc) {
ComputeGraphDescInfo compute_graph_info;
@@ -4095,7 +4088,6 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des

graph_desc_info.emplace_back(compute_graph_info);
}
GELOGI("GetComputeGraphInfo end.");
return SUCCESS;
}

@@ -4160,7 +4152,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input

vector<std::string> inputs;
if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) {
GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size());
GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size());
for (auto it : inputs) {
InputOutputDims input_info;
ParseAIPPInfo(it, input_info);
@@ -4171,7 +4163,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input
int64_t data_input_size;
(void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size);
GELOGD(
"GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: "
"GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: "
"%s, data_type: %s, shape: %s .",
index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size,
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(),


+ 19
- 14
ge/graph/load/new_model_manager/model_manager.cc View File

@@ -202,7 +202,6 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
}

ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
GELOGI("Destroy aicpu session for infer, model id is %u.", model_id);
std::lock_guard<std::mutex> lock(map_mutex_);
auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
@@ -210,7 +209,6 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
return GE_EXEC_MODEL_ID_INVALID;
}
uint64_t session_id = it->second->GetSessionId();
GELOGI("Destroy aicpu session for infer, session id is %lu.", session_id);
DestroyAicpuSession(session_id);
return SUCCESS;
}
@@ -407,10 +405,6 @@ Status ModelManager::Unload(uint32_t model_id) {
}
std::lock_guard<std::mutex> lock(exeception_infos_mutex_);
exception_infos_.clear();
for (auto addr : shape_data_addrs_[model_id]) {
delete[] addr;
}
shape_data_addrs_.erase(model_id);
return SUCCESS;
}

@@ -475,6 +469,19 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_
}
}
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str());
bool cur_dynamic_dims_valid = false;
std::vector<std::string> shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';');
for (auto dynamic_dim : shape_strs) {
if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) {
cur_dynamic_dims_valid = true;
break;
}
}
if (!cur_dynamic_dims_valid) {
GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.",
formats::JoinToString(cur_dynamic_dims).c_str());
return INTERNAL_ERROR;
}
return SUCCESS;
}

@@ -517,7 +524,6 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT
"Failed to memcpy data.");
data.length = length;
input_data.blobs.push_back(data);
shape_data_addrs_[model_id].emplace_back(reinterpret_cast<int64_t *>(data.data));
}
}

@@ -1019,8 +1025,8 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp
Status ModelManager::GenSessionId(uint64_t &session_id) {
std::lock_guard<std::mutex> lock(session_id_create_mutex_);

struct timeval tv;
if (gettimeofday(&tv, nullptr) != 0) {
mmTimeval tv;
if (mmGetTimeOfDay(&tv, nullptr) != 0) {
GELOGE(INTERNAL_ERROR, "Failed to get current time.");
return INTERNAL_ERROR;
}
@@ -1037,8 +1043,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) {

Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener,
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0,
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID,
GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK,
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID,
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno));
GenModelId(&model_id);

@@ -1123,7 +1129,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data,
const std::vector<uint32_t> &input_queue_ids,
const std::vector<uint32_t> &output_queue_ids) {
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0,
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK,
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s",
model_data.key.c_str(), strerror(errno));

@@ -1205,7 +1211,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy

Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data);
if (status == SUCCESS) {
GELOGI("Execute model %u success.", model_id);
GELOGD("Execute model %u success.", model_id);
}

return status;
@@ -1262,7 +1268,6 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_
}

Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
GELOGI("LaunchCustAucpuSo in, kernel name %s", kernel_name.c_str());
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
if (cust_aicpu_so_.size() == 0) return SUCCESS;
// get current context


+ 0
- 2
ge/graph/load/new_model_manager/model_manager.h View File

@@ -18,7 +18,6 @@
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_

#include <model/ge_root_model.h>
#include <pthread.h>
#include <stdint.h>
#include <algorithm>
#include <map>
@@ -364,7 +363,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_;

static DumpProperties dump_properties_;
std::map<uint32_t, std::vector<int64_t *>> shape_data_addrs_;
};
} // namespace ge



+ 4
- 6
ge/graph/load/new_model_manager/model_utils.cc View File

@@ -337,9 +337,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
continue;
}

GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(),
GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index);
break);
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), break);

int64_t input_offset = v_input_offset[non_const_index];
non_const_index++;
@@ -356,7 +354,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
// feature maps
void *mem_addr = nullptr;
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset));
mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(input_offset));
v_input_data_addr.push_back(mem_addr);
} else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) {
int64_t tensor_size = 0;
@@ -424,7 +422,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
// feature maps
void *mem_addr = nullptr;
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i]));
mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_output_offset[i]));
v_output_data_addr.push_back(mem_addr);
} else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) {
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i);
@@ -500,7 +498,7 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param
continue;
}
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) {
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i])));
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_workspace_offset[i])));
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx",
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]);
} else if (v_workspace_bytes[i] == 0) {


+ 2
- 2
ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc View File

@@ -149,7 +149,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
return FAILED;
}

uint64_t workspace_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0]));
uint64_t workspace_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0]));
const vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
const vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
vector<void *> io_addrs;
@@ -287,7 +287,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const
}

if (workspace_data_sizes[0] < static_cast<int64_t>(kernel_def.task_info_size())) {
GELOGE(FAILED, "Node:%s workspace size is %zu, task info size is %zu.", op_desc->GetName().c_str(),
GELOGE(FAILED, "Node:%s workspace size is %ld, task info size is %d.", op_desc->GetName().c_str(),
workspace_data_sizes[0], kernel_def.task_info_size());
return FAILED;
}


+ 61
- 22
ge/graph/load/new_model_manager/task_info/kernel_task_info.cc View File

@@ -31,6 +31,7 @@
#include "runtime/kernel.h"
#include "super_kernel/super_kernel.h"
#include "super_kernel/super_kernel_factory.h"
#include "cce/aicpu_engine_struct.h"

namespace {
const uint8_t kL2LoadToDdr = 1;
@@ -73,7 +74,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_);
(void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_);
has_group_key_ = (group_key_ != kInvalidGroupKey);
GELOGD("node[%s] has_group_key_ %ld, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_);
GELOGD("node[%s] has_group_key_ %d, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_);

// fusion_op_info
vector<std::string> original_op_names;
bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names);
@@ -176,7 +178,7 @@ void KernelTaskInfo::UpdateTaskId() {
}
task_id_ = task_id;
stream_id_ = stream_id;
GELOGI("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id);
GELOGD("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id);
}
}

@@ -216,7 +218,7 @@ Status KernelTaskInfo::SuperKernelLaunch() {
rtError_t rt_ret;
auto &skt_kernel_list = skt_info_.kernel_list;
auto &skt_arg_list = skt_info_.arg_list;
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size());
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%zu] skt_arg_list[%zu]", skt_kernel_list.size(), skt_arg_list.size());
if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) {
rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim),
skt_info_.arg_list[0], skt_info_.last_args_size,
@@ -367,8 +369,9 @@ Status KernelTaskInfo::Distribute() {
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}
rtError_t rt_ret = RT_ERROR_NONE;
char *skt_enable_env = getenv("SKT_ENABLE");
int64_t env_flag = (skt_enable_env != nullptr) ? strtol(skt_enable_env, nullptr, 10) : 0;
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH);
int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0;
bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_);
if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) {
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_);
@@ -379,7 +382,7 @@ Status KernelTaskInfo::Distribute() {
call_save_dump_ = true;
} else {
/* default: not skt launch */
GELOGI(
GELOGD(
"KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s "
"stubfunc:%p blockdim:%u stream:%p",
call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
@@ -406,7 +409,7 @@ Status KernelTaskInfo::Distribute() {
}
// set for task_id_
UpdateTaskId();
GELOGI(
GELOGD(
"KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p "
"blockdim:%d stream:%p",
call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_);
@@ -747,15 +750,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel
}
}
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4

rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
@@ -913,7 +916,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);

aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size());
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());

// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
@@ -956,12 +959,40 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) {
if (ext_info.empty()) {
return SUCCESS;
}

std::unique_ptr<uint8_t[]> copy_ext_info;
copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]);
GE_CHECK_NOTNULL(copy_ext_info);
auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size());
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}

auto ext_info_data = copy_ext_info.get();
size_t offset = 0;
while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) {
auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset);
GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(SessionInfo), PARAM_INVALID,
"Parse ext session info failed as infoLen must be %zu but %u.",
sizeof(SessionInfo), aicpu_ext_info->infoLen);
SessionInfo *session_info = reinterpret_cast<SessionInfo *>(aicpu_ext_info->infoMsg);
session_info->sessionId = davinci_model_->GetSessionId();
session_info->sessFlag = true;
GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId);
}
offset += sizeof(aicpu::FWKAdapter::ExtInfo);
offset += aicpu_ext_info->infoLen;
}

auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size());
return RT_ERROR_TO_GE_STATUS(rt_ret);
@@ -1122,18 +1153,24 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
}

GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str());
auto handle = dlopen(canonicalPath.c_str(), RTLD_NOW | RTLD_GLOBAL);
auto handle = mmDlopen(canonicalPath.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
const char *error = "";
if (handle == nullptr) {
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror());
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error);
return FAILED;
}
cce::ccStatus_t cc_ret;
std::string update_kernel_args = "ccUpdateKernelArgs";
auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t,
void *))dlsym(handle, "ccUpdateKernelArgs");
void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str()));
if (cceUpdateKernelArgs == nullptr) {
GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs");
if (dlclose(handle) != 0) {
GELOGW("Failed to close handle %s", dlerror());
if (mmDlclose(handle) != 0) {
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle %s", error);
}
return FAILED;
} else {
@@ -1146,8 +1183,10 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u
const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl);
}
}
if (dlclose(handle) != 0) {
GELOGW("Failed to close handle %s", dlerror());
if (mmDlclose(handle) != 0) {
error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("Failed to close handle %s", error);
return FAILED;
}
if (cc_ret != cce::CC_STATUS_SUCCESS) {
@@ -1188,7 +1227,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe

*(reinterpret_cast<uint64_t *>(
args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) =
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_));
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_));
}
return SUCCESS;
}


+ 1
- 1
ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc View File

@@ -23,7 +23,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {
const void *func_stub_ = this->GetFuncStub();

const void *args[] = {this->GetNavTablePtr(),
reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(this->GetNavTableSize()))};
reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))};

rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return


+ 8
- 6
ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc View File

@@ -27,7 +27,7 @@ SuperKernelFactory &SuperKernelFactory::GetInstance() {
Status SuperKernelFactory::Init() {
if (!is_init_) {
std::string skt_bin = "libcce_aicore.so";
handle_ = dlopen(skt_bin.c_str(), RTLD_NOW | RTLD_GLOBAL);
handle_ = mmDlopen(skt_bin.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
if (handle_ == nullptr) {
GELOGE(FAILED, "SKT: open skt lib failed, please check LD_LIBRARY_PATH.");
}
@@ -85,8 +85,10 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
"equal to 2");
return FAILED;
}
GELOGI("SKT: superkernel start fuse, superkernel size %d.", stub_func_list.size());
uint64_t nav_table[2 * stub_func_list.size()];
GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size());
const size_t nav_table_len = 2 * stub_func_list.size();
std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]);
GE_CHECK_NOTNULL(nav_table);
uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t);

rtError_t rt_ret;
@@ -99,16 +101,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func);
// store two uint64_t address
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4;
nav_table[i * 2] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4;
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]);
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
}
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);)
rt_ret =
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret);
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
// Create the necessary metadata for the super kernel


+ 4
- 2
ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.h View File

@@ -34,8 +34,10 @@ class SuperKernelFactory {
~SuperKernelFactory() {
if (handle_ != nullptr) {
GELOGI("SKT: SKT LIB PATH release.");
if (dlclose(handle_) != 0) {
GELOGW("failed to close handle, message: %s", dlerror());
if (mmDlclose(handle_) != 0) {
const char *error = mmDlerror();
GE_IF_BOOL_EXEC(error == nullptr, error = "");
GELOGW("failed to close handle, message: %s", error);
}
}
};


+ 13
- 31
ge/graph/load/new_model_manager/zero_copy_offset.cc View File

@@ -30,49 +30,37 @@ ZeroCopyOffset::ZeroCopyOffset() {}

ZeroCopyOffset::~ZeroCopyOffset() {}

Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list,
const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc,
Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc,
bool &fusion_flag) {
GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p",
op_desc->GetName().c_str(), output_size_list[kDataIndex], virtual_addr_list[kDataIndex]);
if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size())) {
GELOGE(PARAM_INVALID, "Data[%s] init failed: Output size is %zu, Output addr is %zu", op_desc->GetName().c_str(),
output_size_list.size(), virtual_addr_list.size());
return PARAM_INVALID;
}

basic_addr_ = virtual_addr_list[kDataIndex];
op_desc->GetName().c_str(), output_size, virtual_addr);
basic_addr_ = virtual_addr;
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_);
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_);
GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID,
"basic_offset_size should be equal to relative_offset_size");
GELOGI("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size());
GELOGD("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size());

int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex);
GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset);
IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag);

uint32_t out_count = 0;
data_size_ = output_size_list[kDataIndex];
data_size_ = output_size;
if (!fusion_flag) {
GELOGI("[ZCPY] %s not set l2_fusion.", op_desc->GetName().c_str());
out_count++;
data_info_.emplace_back(output_size_list[kDataIndex], virtual_addr_list[kDataIndex]);
data_info_.emplace_back(output_size, virtual_addr);
relative_offset_.emplace_back(0);
GELOGI("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size_list[kDataIndex],
virtual_addr_list[kDataIndex]);
GELOGD("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size, virtual_addr);
} else {
GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str());
for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) {
if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) {
out_count++;
uint64_t out_offset =
reinterpret_cast<uint64_t>(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index);
int64_t real_data_size = ModelUtils::GetOutputSize(op_desc).at(kDataIndex);
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(out_offset)));
uint64_t out_offset = reinterpret_cast<uint64_t>(virtual_addr) + zero_copy_relative_offset_.at(index);
data_info_.emplace_back(output_size, reinterpret_cast<void *>(static_cast<uintptr_t>(out_offset)));
relative_offset_.emplace_back(zero_copy_relative_offset_.at(index));
GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %lu, need copy data_size is %ld.", basic_addr_,
out_offset, real_data_size);
out_offset, output_size);
}
}
}
@@ -83,7 +71,6 @@ Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list
Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list,
const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc,
const size_t &idx, bool &fusion_flag) {
GELOGI("[ZCPY] Start to InitOutputDataInfo of %s.", op_desc->GetName().c_str());
int64_t size = input_size_list[idx];
auto tensor_desc = op_desc->GetInputDescPtr(idx);
GE_CHECK_NOTNULL(tensor_desc);
@@ -92,7 +79,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list
return FAILED;
}

GELOGI("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size);
GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size);

basic_addr_ = virtual_addr_list[idx];
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_);
@@ -100,13 +87,11 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list
GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID,
"basic_offset_size should be equal to relative_offset_size");
int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx);
GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset);
IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag);

uint32_t in_count = 0;
data_size_ = size;
if (!fusion_flag) {
GELOGI("[ZCPY] %s not set l2-fusion.", op_desc->GetName().c_str());
in_count++;
data_info_.emplace_back(size, virtual_addr_list[idx]);
// op_desc not set l2fusion when fusion_flag is false
@@ -119,7 +104,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list
in_count++;
uint64_t in_offset = reinterpret_cast<uint64_t>(virtual_addr_list[idx]) + zero_copy_relative_offset_.at(index);
int64_t real_data_size = ModelUtils::GetInputSize(op_desc).at(idx);
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(in_offset)));
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(static_cast<uintptr_t>(in_offset)));
relative_offset_.emplace_back(zero_copy_relative_offset_.at(index));
GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %lu, need copy data_size is %ld.", basic_addr_,
in_offset, real_data_size);
@@ -142,10 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const

void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs) {
GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr %p.", addr);
uint32_t out_count = 0;
if (!fusion_flag) {
GELOGI("[ZCPY] not set l2-fusion for virtual_adr %p.", addr);
out_count++;
std::map<const void *, std::vector<void *>> addr_mapping;
addr_mapping[addr] = {};
@@ -175,7 +158,6 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr %p.", addr);
uint32_t out_count = 0;
if (!fusion_flag) {
GELOGI("[ZCPY] not set l2-fusion for virtual_addr %p.", addr);
out_count++;
std::map<const void *, std::vector<void *>> addr_mapping;
addr_mapping[addr] = {};
@@ -209,7 +191,7 @@ bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *ou
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid.");
void *args_val = static_cast<uint8_t *>(args) + offset;
args_addrs->second.push_back(args_val);
GELOGI("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
args, offset);
set_batch_label_flag = true;
}


+ 1
- 2
ge/graph/load/new_model_manager/zero_copy_offset.h View File

@@ -42,8 +42,7 @@ class ZeroCopyOffset {
ZeroCopyOffset();
~ZeroCopyOffset();

Status InitInputDataInfo(const vector<int64_t> &output_size_list, const vector<void *> &virtual_addr_list,
const OpDescPtr &op_desc, bool &fusion_flag);
Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index,
bool fusion_flag, std::set<const void *> &real_virtual_addrs);



+ 4
- 3
ge/graph/load/new_model_manager/zero_copy_task.cc View File

@@ -19,6 +19,7 @@
#include "framework/common/debug/ge_log.h"
#include "framework/common/util.h"
#include "graph/load/new_model_manager/model_utils.h"
#include "common/ge_compiler_options.h"

namespace ge {
const char *const kDefaultBatchLable = "Batch_default";
@@ -48,7 +49,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) {
it->second.insert(offset);
}

GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr,
GELOGD("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr,
args_addr_, args_size_, offset);
return SUCCESS;
}
@@ -157,7 +158,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) {
rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX,
stream);
} else {
__builtin_prefetch(args_addr_);
GE_BUILTIN_PREFETCH(args_addr_);
rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE);
}

@@ -166,7 +167,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) {
return RT_ERROR_TO_GE_STATUS(rt_err);
}

GELOGI("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(),
GELOGD("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(),
args_addr_, args_size_, args_info_.data(), args_info_.size());
return SUCCESS;
}


+ 23
- 36
ge/graph/manager/graph_manager.cc View File

@@ -363,7 +363,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
for (auto &subgraph : compute_graph->GetAllSubgraphs()) {
(void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
}
GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]");
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
}

GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
@@ -396,8 +396,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
stages.builder.SetOptions(options_);

var_acc_ctrl_.AddGraph(graph_id, compute_graph);

GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id);
return SUCCESS;
}

@@ -435,7 +433,7 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
for (auto &subgraph : new_compute_graph->GetAllSubgraphs()) {
(void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id);
}
GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]");
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]");
}

GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id);
@@ -468,8 +466,6 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
stages.builder.SetOptions(options_);

var_acc_ctrl_.AddGraph(graph_id, new_compute_graph);

GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id);
return SUCCESS;
}

@@ -546,7 +542,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
const auto &root_subgraph_list = sub_graph_map[compute_graph];
std::string op_compile_strategy;
(void)AttrUtils::GetStr(compute_graph, ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
GELOGI("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str());
GELOGD("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str());
for (const auto &subgraph : root_subgraph_list) {
if (!op_compile_strategy.empty()) {
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy);
@@ -576,7 +572,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr
vector_future.emplace_back(std::move(f));
}
}
GELOGI("All sub graph num is %zu", vector_future.size());
GELOGD("All sub graph num is %zu", vector_future.size());
for (size_t i = 0; i < vector_future.size(); ++i) {
Status ret_status = vector_future[i].get();
if (ret_status != SUCCESS) {
@@ -700,7 +696,7 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr
/// Multiply optimize subgraph:
/// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize";
/// 2. run lx fusion or buffer according build_mode and build_step in fe.
GELOGI("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.",
GELOGD("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.",
options_.build_mode.c_str(),
options_.build_step.c_str(),
buffer_optimize.c_str());
@@ -747,7 +743,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node,
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph));

GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed.");
GELOGI("PreRun:PreRunOptimizeOriginalGraph success.");
GELOGD("PreRun:PreRunOptimizeOriginalGraph success.");
return SUCCESS;
}

@@ -762,10 +758,10 @@ Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node,
if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) {
std::string tuning_path;
(void) GetContext().GetOption(TUNING_PATH, tuning_path);
GELOGI("Dump path:%s.", tuning_path.c_str());
GELOGD("Dump path:%s.", tuning_path.c_str());
GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path);
}
GELOGI("PreRun:PreRunOptimizeSubGraph success.");
GELOGD("PreRun:PreRunOptimizeSubGraph success.");
return SUCCESS;
}

@@ -785,12 +781,12 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node,
}

GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id);
GELOGI("PreRun:PreRunAfterOptimizeSubGraph success.");
GELOGD("PreRun:PreRunAfterOptimizeSubGraph success.");
return SUCCESS;
}

Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) {
GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id,
GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id,
static_cast<int>(mode), ge::GetContext().DeviceId());

rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId());
@@ -1251,7 +1247,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const

Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) {
GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id);
if (inputs.empty()) {
GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs");
}
@@ -1531,7 +1527,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti

// Set save_original_model flag (ge.save_original_model)
ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model);
GELOGI("Set save original model flag %s", options_.save_original_model.c_str());
// Original model file name
ParseOption(options, ORIGINAL_MODEL_FILE, options_.original_model_file);

@@ -1540,16 +1535,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
ParseOption(options, DYNAMIC_NODE_TYPE, options_.dynamic_node_type);
GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d.",
options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type);
if ((!options_.input_shape.empty() && options_.dynamic_dims.empty()) ||
(options_.input_shape.empty() && !options_.dynamic_dims.empty())) {
GELOGE(GRAPH_PARAM_INVALID, "Should set input shape and dynamic dims at the same time");
return GRAPH_PARAM_INVALID;
}
if ((!options_.input_shape.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType) ||
(!options_.dynamic_dims.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType)) {
GELOGE(GRAPH_PARAM_INVALID, "Should set valid dynamic node type");
return GRAPH_PARAM_INVALID;
}

// Set Build model and step
ParseOption(options, BUILD_MODE, options_.build_mode);
@@ -2252,7 +2237,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) {
}

Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
GELOGI("Start optimize after merge sub graph.");
GELOGD("Start optimize after merge sub graph.");

PassManager after_merge_passes;
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass",
@@ -2502,7 +2487,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager

ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph();
const std::string &engine_name = sub_graph_info_ptr->GetEngineName();
GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu",
GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu",
compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(),
pthread_self());
GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore");
@@ -2514,11 +2499,11 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager
GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str());
return ret;
} else {
GELOGI("SubGraph optimize success %s", engine_name.c_str());
GELOGD("SubGraph optimize success %s", engine_name.c_str());
}
GE_DUMP(compute_graph_tmp, "OptimizeSubGraphAfter");
sub_graph_info_ptr->SetSubGraph(compute_graph_tmp);
GELOGI("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu",
GELOGD("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu",
compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(),
pthread_self());
} else {
@@ -2849,13 +2834,15 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
if (args.graph_node->graph_run_async_listener_ != nullptr) {
args.graph_node->graph_run_async_listener_->SetCallback(args.callback);
}
Status ret;
// parse inputs.dims to vector<vector<uint64_t>> dynamic_dims
if (graph_manager->ParseInputsDims(args.input_tensor) != SUCCESS) {
GELOGE(PARAM_INVALID, "Parse input dims failed.");
ret = graph_manager->ParseInputsDims(args.input_tensor);
if (ret != SUCCESS) {
ReturnError(graph_manager, args.callback, ret, "ParseInputsDims failed, thread exit.");
args.graph_node->Unlock();
return;
}

Status ret;
if (!args.graph_node->GetLoadFlag()) {
ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node);
if (ret != SUCCESS || args.ge_root_model == nullptr) {
@@ -2880,12 +2867,12 @@ void GraphManager::RunThread(GraphManager *graph_manager) {
ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(),
args.input_tensor);
args.graph_node->SetRunFlag(false);
args.graph_node->Unlock();
if (ret != SUCCESS) {
GELOGE(ret, "[GraphManager] Run graph async failed, graph_id=%u.", args.graph_id);
StopQueue(graph_manager);
ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit.");
args.graph_node->Unlock();
return;
}
args.graph_node->Unlock();
GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id);
}
}


+ 7
- 7
ge/graph/manager/graph_var_manager.cc View File

@@ -92,13 +92,13 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str());
if (var_addr_mgr_map_.count(var_key) == 0) {
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() +
reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(),
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str());
VarAddrMgr var_addr_mgr;
var_addr_mgr.address = reinterpret_cast<uint8_t *>(reinterpret_cast<std::uintptr_t>(logic_address));
var_addr_mgr.offset = reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
var_addr_mgr.address = reinterpret_cast<uint8_t *>(static_cast<std::uintptr_t>(logic_address));
var_addr_mgr.offset = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
var_addr_mgr.tensor_desc = tensor_desc;
var_addr_mgr.memory_type = memory_type;
var_addr_mgr_map_[var_key] = var_addr_mgr;
@@ -510,7 +510,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
}

result = var_resource_->SaveVarAddr(
var_name, tensor_desc, reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type);
var_name, tensor_desc, reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
if (result != SUCCESS) {
GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed.");
return ge::INTERNAL_ERROR;
@@ -527,7 +527,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc);
if (result != SUCCESS) {
var_resource_->SetVarAddr(var_name, tensor_desc,
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type);
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
return SUCCESS;
}

@@ -542,7 +542,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen
ge::TypeUtils::FormatToSerialString(cur_tensor_desc.GetFormat()).c_str(),
cur_tensor_desc.GetShape().GetDims().size());
var_resource_->SetVarAddr(var_name, tensor_desc,
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type);
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type);
}

return SUCCESS;
@@ -642,7 +642,7 @@ ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::strin
bool VarManager::IsVarAddr(const int64_t &offset) {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (var_resource_ == nullptr) {
GELOGW("VarManager has not been init.");
GELOGD("VarManager has not been init.");
return false;
}
return var_resource_->IsVarAddr(offset);


+ 4
- 4
ge/graph/manager/trans_var_data_utils.cc View File

@@ -374,7 +374,7 @@ Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge
GE_MAKE_GUARD_RTMEM(src_host_addr);
GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id));

GELOGI("src_addr_size: %u, dst_addr_size: %u", src_addr_size, dst_addr_size);
GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size);
GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, "var data size is not equal broadcast ");

GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE));
@@ -403,7 +403,7 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr));
uint8_t *mem_addr =
src_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size));
@@ -420,7 +420,7 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr));
uint8_t *mem_addr =
dst_addr -
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) +
static_cast<int64_t>(
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM)));
GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE));
@@ -501,7 +501,7 @@ Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes,
}

Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id) {
GELOGI("CopyVarData start: session_id:%lu.", session_id);
GELOGD("CopyVarData start: session_id:%lu.", session_id);
if (compute_graph == nullptr) {
GELOGE(FAILED, "compute_graph is nullptr");
return FAILED;


+ 3
- 3
ge/graph/manager/util/debug.cc View File

@@ -32,7 +32,7 @@ Debug::~Debug() = default;

void Debug::DumpProto(const Message &proto, const char *file) {
std::string file_path = RealPath(file);
int fd = open(file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD);
if (fd == -1) {
GELOGW("Write %s failed", file_path.c_str());
return;
@@ -40,7 +40,7 @@ void Debug::DumpProto(const Message &proto, const char *file) {
auto output = ge::MakeShared<FileOutputStream>(fd);
if (output == nullptr) {
GELOGW("create output failed.");
if (close(fd) != 0) {
if (mmClose(fd) != 0) {
GELOGW("close fd failed.");
}
return;
@@ -49,7 +49,7 @@ void Debug::DumpProto(const Message &proto, const char *file) {
if (!ret) {
GELOGW("dump proto failed.");
}
if (close(fd) != 0) {
if (mmClose(fd) != 0) {
GELOGW("close fd failed.");
}
}


+ 0
- 2
ge/graph/manager/util/debug.h View File

@@ -17,7 +17,6 @@
#ifndef GE_GRAPH_MANAGER_UTIL_DEBUG_H_
#define GE_GRAPH_MANAGER_UTIL_DEBUG_H_

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>
@@ -25,7 +24,6 @@
#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/text_format.h>
#include <unistd.h>
#include <algorithm>
#include <fstream>
#include <iosfwd>


+ 3
- 4
ge/graph/optimize/graph_optimize.cc View File

@@ -58,8 +58,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) {

for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(
peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr! node: %s", node->GetName().c_str()); continue);
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);

ge::NodePtr src_node = peer_out_anchor->GetOwnerNode();
src_index_list = node_op_desc->GetSrcIndex();
@@ -242,11 +241,11 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr
}

auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority();
GELOGI("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.",
GELOGD("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.",
graph_optimizer.size());
Status ret = SUCCESS;
string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine;
GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s",
GELOGD("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s",
exclude_core_Type.c_str(), core_type_.c_str());
if (graph_optimizer.size() != 0) {
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) {


+ 0
- 397
ge/graph/optimize/optimizer/allreduce_fusion_pass.cc View File

@@ -1,397 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "graph/optimize/optimizer/allreduce_fusion_pass.h"
#include <string>
#include "common/debug/log.h"
#include "framework/common/debug/ge_log.h"
#include "common/types.h"
#include "common/util.h"
#include "graph/anchor.h"
#include "graph/node.h"
#include "graph/op_desc.h"
#include "graph/utils/attr_utils.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/tensor_utils.h"
#include "graph/debug/ge_attr_define.h"
#include "hccl/base.h"
#include "hccl/hcom.h"

namespace ge {
Status AllReducePass::Run(ge::ComputeGraphPtr graph) {
GELOGI("FusionAllReducePass: start");
std::vector<NodePtr> fusionOps;
std::vector<float> inputGradientSize;
std::vector<float> inputGradientTime;

static const float inputGradientSizeTemp = 0.0;
static const float inputGradientTimeTemp = 0.0;

// Get all nodes
for (auto nodePtr : graph->GetDirectNode()) {
GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;);

ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc();
GE_IF_BOOL_EXEC(nullptr == opDescPtr,
GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str());
continue;)
GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(),
// the op is allreduce and fusion > 0, then run fusion
std::int64_t hcom_fusion = 1;
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion),
GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr "
"by HCOM_ATTR_FUSION"));
GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC(
hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp);
inputGradientTime.push_back(inputGradientTimeTemp);))
}
// The number of allredecue operator must be more than 1
GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has "
"%lu allreduce operator",
fusionOps.size());
return NOT_CHANGED;);

string group = "group";
u32 gradientNum = fusionOps.size();
string model_name_str = graph->GetName();
const char *model_name = model_name_str.c_str();
model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()};

u32 segmentNum = 0;
u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {};

// Call HCCL function: hcom_gradient_segment
GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy");
GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM,
&segmentNum, segmentIndex),
GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size());
return FAILED;)
GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully");

// check whether segmentNum is legal or not
GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum),
GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal segmentNum=%u, "
"HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u",
segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum);
return FAILED;);

// check whether segmentIndex is legal or not
GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1),
GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, "
"segmentIndex[segmentNum-1]=%u, gradientNum=%u",
segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum);
return FAILED;);

for (uint32_t i = 0; i < segmentNum - 1; i++) {
GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED,
"FusionAllReducePass FAILED: illegal "
"segmentIndex[%u]=%u, segmentIndex[%u]=%u",
i, segmentIndex[i], i + 1, segmentIndex[i + 1]);
return FAILED;);
}

// check whether fusion is needed or not
GE_IF_BOOL_EXEC(
segmentNum == gradientNum,
GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum);
return NOT_CHANGED;)

std::unordered_set<void *> anchorPtrSet;
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor;
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl;
std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor;
std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor;
std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData;
std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor;
ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr;

// Traversing the segmentNum
uint32_t start = 0;
uint32_t end = 0;
for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) {
end = segmentIndex[segmentIdx];
GE_IF_BOOL_EXEC(end - start < 1,
GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]);
start = end + 1; continue;);

ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc();
GE_CHECK_NOTNULL(originDescPtr);
ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr);
GE_CHECK_NOTNULL(newAllreduceDesc);

// Cleat buffer
anchorPtrSet.clear();
fusionOpPeerOutDataAnchor.clear();
fusionOpPeerOutDataToInControl.clear();
fusionOpPeerOutControlAnchor.clear();
fusionOpPeerInDataAnchor.clear();
fusionOpPeerInControlFromOutData.clear();
fusionOpPeerInControlAnchor.clear();

// Traversing the Allreduce operators of each group
int outDataAnchorIndex = 0;
GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]),
"Get peer outDataAnchor to inDataAnchor failed");

GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
fusionOps[start]),
"Get peer inDataAnchor and inControlAnchor to outDataAnchor failed");

GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]),
"Get peer outDataAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]),
"Get peer outControlAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]),
"Get peer outControlAnchor from inControlAnchor failed");
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.",
fusionOps[start]->GetName().c_str());

for (uint32_t idx = start + 1; idx <= end; idx++) {
GE_CHK_STATUS_RET(
GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc),
"Get peer outDataAnchor to inDataAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]),
"Get peer outDataAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]),
"Get peer outControlAnchor to inControlAnchor failed");
GE_CHK_STATUS_RET(
GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData,
fusionOps[idx], newAllreduceDesc, outDataAnchorIndex),
"Get peerAnchor from outDataAnchor failed");
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]),
"Get peer outControlAnchor from inControlAnchor failed");

// Delete the node
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.",
fusionOps[idx]->GetName().c_str());
}

NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc);
GE_CHECK_NOTNULL(newAllReducePtr);
// Link the inputDataAnchor
for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) {
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))),
"FusionAllReducePass FAILED: add input data edge failed");
}

// Link the inputControlAnchor
for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add input control edge failed");
}

for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add edge from out data to incontrol "
"failed");
}

// Link the outputDataAnchor
for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) {
auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second;
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor),
"FusionAllReducePass FAILED: add output data edge failed");
}
for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) {
auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second;
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first),
peerInControlAnchor),
"FusionAllReducePass FAILED: add edge from out data to in control "
"failed");
}

// Link the outputControlAnchor
for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) {
GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]),
"FusionAllReducePass FAILED: add output control edge failed");
}

// Link the newAllreduce
if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) {
GE_CHK_STATUS_RET(
GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()),
"FusionAllReducePass FAILED: add input previous control edge failed");
}

previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor();
start = end + 1;
}

return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
ge::NodePtr &srcNodePtr) {
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
peerOutDataAnchorVec.push_back(peerOutDataAnchor);
anchorSet.insert(peerOutDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerInAnchorToOutData(
std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) {
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;);
for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;);
if (anchorSet.count(peerInDataAnchor.get()) == 0) {
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
pairPeerInDataAnchor.first = 0;
pairPeerInDataAnchor.second = peerInDataAnchor;
fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor);
anchorSet.insert(peerInDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor));
}
}

for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;);
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
pairPeerInControlAnchorFromData.first = 0;
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData);
anchorSet.insert(peerInControlAnchorFromData.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData));
}
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) {
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) {
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;);
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor();
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;);
if (anchorSet.count(peerOutDataAnchor.get()) == 0) {
peerOutDataAnchorVec.push_back(peerOutDataAnchor);
anchorSet.insert(peerOutDataAnchor.get());
if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) !=
ge::GRAPH_SUCCESS) {
GELOGW("GetPeerOutDataToInData: AddInputDesc failed");
}
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec,
ge::NodePtr &srcNodePtr) {
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
GE_CHECK_NOTNULL(inControlAnchor);
for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) {
GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;);
if (anchorSet.count(peerOutDataToInControl.get()) == 0) {
peerOutDataToInControlVec.push_back(peerOutDataToInControl);
anchorSet.insert(peerOutDataToInControl.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
ge::NodePtr &srcNodePtr) {
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor();
GE_CHECK_NOTNULL(inControlAnchor);
for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) {
GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;);
if (anchorSet.count(peerOutControlAnchor.get()) == 0) {
peerOutControlToInControlVec.push_back(peerOutControlAnchor);
anchorSet.insert(peerOutControlAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor));
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerAnchorFromOutData(
std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr,
ge::OpDescPtr &dstOpDescPtr, int &index) {
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) {
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;)
if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) {
if (dstOpDescPtr->AddOutputDesc(
outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) {
GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed");
}
index++;
}

for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) {
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;)
if (anchorSet.count(peerInDataAnchor.get()) == 0) {
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor;
pairPeerInDataAnchor.first = index;
pairPeerInDataAnchor.second = peerInDataAnchor;
peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor);
anchorSet.insert(peerInDataAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor))
}
}

for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;)
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) {
std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData;
pairPeerInControlAnchorFromData.first = index;
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData;
peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData);
anchorSet.insert(peerInControlAnchorFromData.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData))
}
}
}
return SUCCESS;
}

Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
ge::NodePtr &srcNodePtr) {
OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor();
GE_CHECK_NOTNULL(outControlAnchor);
for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) {
GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;)
if (anchorSet.count(peerInControlAnchor.get()) == 0) {
peerInControlFromOutControlVec.push_back(peerInControlAnchor);
anchorSet.insert(peerInControlAnchor.get());
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor))
}
}
return SUCCESS;
}
} // namespace ge

+ 0
- 56
ge/graph/optimize/optimizer/allreduce_fusion_pass.h View File

@@ -1,56 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_
#define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_

#include <unordered_set>
#include <utility>
#include <vector>
#include "inc/graph_pass.h"

namespace ge {
//
class AllReducePass : public GraphPass {
public:
Status Run(ge::ComputeGraphPtr graph) override;

private:
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr,
ge::OpDescPtr &dstOpDescPtr);
Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr);
Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet,
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec,
ge::NodePtr &srcNodePtr);
Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet,
vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec,
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec,
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index);
Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet,
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec,
ge::NodePtr &srcNodePtr);
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet,
std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec,
ge::NodePtr &srcNodePtr);
Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet,
std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor,
std::vector<std::pair<int, ge::InControlAnchorPtr>>&fusionOpPeerInControlFromOutData,
ge::NodePtr &srcNodePtr);
};
} // namespace ge
#endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_

+ 2
- 2
ge/graph/partition/engine_place.cc View File

@@ -50,7 +50,7 @@ Status EnginePlacer::Check() const {
Status EnginePlacer::Run() {
std::lock_guard<std::mutex> lock(check_support_cost_mutex);

GELOGI("Engine placer starts.");
GELOGD("Engine placer starts.");
if (Check() != SUCCESS) {
return FAILED;
}
@@ -101,7 +101,7 @@ Status EnginePlacer::Run() {
for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) {
GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second);
}
GELOGI("Engine placer ends.");
GELOGD("Engine placer ends.");
return is_check_support_success ? SUCCESS : FAILED;
}



+ 6
- 6
ge/graph/partition/graph_partition.cc View File

@@ -223,7 +223,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co
GELOGE(GE_GRAPH_UNSUPPORTED, "Cannot call merging in partition mode");
return FAILED;
}
GELOGI("Graph merge starts.");
GELOGD("Graph merge starts.");
// check input param
for (const auto &it : sub_graph_list) {
if (it == nullptr) {
@@ -261,7 +261,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co
return FAILED;
}
GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun");
GELOGI("Graph merge ends.");
GELOGD("Graph merge ends.");
return SUCCESS;
}

@@ -581,7 +581,7 @@ Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) {
new_cluster->engine_name_.c_str(), new_cluster->index_, new_cluster->stream_label_.c_str());
temp_index++;
}
GELOGI("Initialize ends.");
GELOGD("Initialize ends.");
return SUCCESS;
}

@@ -754,11 +754,11 @@ void ge::GraphPartitioner::MarkClusters() {
}
}
}
GELOGI("MarkClusters ends.");
GELOGD("MarkClusters ends.");
}

Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) {
GELOGI("SplitSubGraphs starts.");
GELOGD("SplitSubGraphs starts.");
if (compute_graph == nullptr) {
GELOGE(FAILED, "parameter ptr is null.");
return FAILED;
@@ -823,7 +823,7 @@ Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) {
}
}
}
GELOGI("SplitSubGraphs ends.");
GELOGD("SplitSubGraphs ends.");
return SUCCESS;
}



+ 1
- 1
ge/graph/passes/atomic_addr_clean_pass.cc View File

@@ -46,7 +46,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) {
}
}
if (atomic_node_vec.empty()) {
GELOGI("There is no atomic node. Ignore atomicAddrClean pass.");
GELOGD("There is no atomic node. Ignore atomicAddrClean pass.");
return SUCCESS;
}



+ 1
- 1
ge/graph/passes/cond_remove_pass.cc View File

@@ -332,7 +332,7 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph,
return FAILED;
}
} else {
GELOGI("no need cond_remove_pass for node %s.", node->GetName().c_str());
GELOGD("no need cond_remove_pass for node %s.", node->GetName().c_str());
return NOT_CHANGED;
}



+ 11
- 0
ge/graph/passes/mark_agnostic_pass.cc View File

@@ -16,6 +16,7 @@
#include "graph/passes/mark_agnostic_pass.h"

#include "graph/utils/node_utils.h"
#include "graph/utils/tensor_utils.h"

namespace ge {
Status MarkAgnosticPass::Run(ComputeGraphPtr graph) {
@@ -47,6 +48,16 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) {
}
if (node_type == MERGE) {
GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str());
auto in_nodes = node->GetInAllNodes();
vector<NodePtr> input_nodes(in_nodes.begin(), in_nodes.end());
/// Enter-----------+
/// +-> Merge
/// NextIteration---+
if (input_nodes.size() == 2) {
if (input_nodes[0]->GetType() == ENTER && input_nodes[1]->GetType() == NEXTITERATION) {
continue;
}
}
const OpDescPtr op_desc = node->GetOpDesc();
const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0);
if (op_tensor == nullptr) {


+ 1
- 1
ge/graph/passes/memcpy_addr_async_pass.cc View File

@@ -278,7 +278,7 @@ Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &ou
}

Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node) {
GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str());
GELOGD("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str());
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx());
GE_CHECK_NOTNULL(in_node);


+ 2
- 2
ge/graph/passes/multi_batch_pass.cc View File

@@ -33,7 +33,7 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) {
OutDataAnchorPtr pred_value = nullptr;
Status ret = FindPredValue(graph, pred_value);
if (ret == NOT_CHANGED) {
GELOGI("SwitchN node not exist, graph not changed.");
GELOGD("SwitchN node not exist, graph not changed.");
return SUCCESS;
}
if (ret != SUCCESS) {
@@ -158,7 +158,7 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor
}

if (switch_n_nodes_.empty()) {
GELOGI("SwitchN node not exist.");
GELOGD("SwitchN node not exist.");
return NOT_CHANGED;
}



+ 2
- 2
ge/graph/passes/set_input_output_offset_pass.cc View File

@@ -128,7 +128,7 @@ Status SetInputOutputOffsetPass::SetInputOffsetForHcom(const ge::NodePtr &node,
}

Status SetInputOutputOffsetPass::SetInputOffset(const NodePtr &node, const vector<int> &connect_input) {
GELOGI("Start to SetInputOffset for %s.", node->GetName().c_str());
GELOGD("Start to SetInputOffset for %s.", node->GetName().c_str());
std::vector<int64_t> memory_type;
auto op_desc = node->GetOpDesc();
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type);
@@ -241,7 +241,7 @@ Status SetInputOutputOffsetPass::SetOutputOffsetForHcom(const NodePtr &node, con
}

Status SetInputOutputOffsetPass::SetOutputOffset(const NodePtr &node, const vector<int> &connect_output) {
GELOGI("Start SetOutputOffset of %s.", node->GetName().c_str());
GELOGD("Start SetOutputOffset of %s.", node->GetName().c_str());
bool attr_no_task = false;
bool get_attr_no_task = ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NOTASK, attr_no_task);
if (get_attr_no_task && attr_no_task) {


+ 2
- 0
ge/graph/preprocess/graph_preprocess.cc View File

@@ -117,6 +117,7 @@
#include "graph/passes/variable_op_pass.h"
#include "graph/passes/variable_prepare_op_pass.h"
#include "graph/passes/variable_ref_delete_op_pass.h"
#include "graph/passes/mark_agnostic_pass.h"


namespace ge {
@@ -1700,6 +1701,7 @@ Status GraphPrepare::PrepareOptimize() {
try {
(void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass);
(void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass);
(void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass" , new MarkAgnosticPass);
} catch (std::bad_alloc &e) {
GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs.");
return INTERNAL_ERROR;


+ 4
- 3
ge/graph/preprocess/multi_batch_copy_graph.cc View File

@@ -1571,6 +1571,10 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node,
// Connect NetOutput directly
void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node,
const set<size_t> &dynamic_output_index, vector<string> &dynamic_output_dims) {
if (!GetLocalOmgContext().dynamic_node_type.empty()) {
GELOGD("No need to get directly shape info of %s when train.", node->GetName().c_str());
return;
}
GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str());
const auto &netoutput_desc = node->GetOpDesc();
const auto &inputnode_to_netoutput = node->GetInAllNodes();
@@ -1578,9 +1582,6 @@ void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node,
if (dynamic_output_index.count(i) > 0) {
continue;
}
if (inputnode_to_netoutput.at(i)->GetType() == GETDYNAMICDIMS) {
continue;
}

auto tensor_desc = netoutput_desc->GetInputDesc(i);
auto shape = tensor_desc.GetShape().ToString();


+ 6
- 0
ge/graph/preprocess/multi_batch_options.cc View File

@@ -84,8 +84,10 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n
if (op_desc->GetType() == DATA && op_desc->GetName() != kShapeDataName) {
if (op_desc->GetName().find(kSubstrOfGetNextNosinkName) == string::npos) {
data_nodes.emplace_back(input_node);
GELOGD("Name of data node is %s.", op_desc->GetName().c_str());
} else {
getnext_nosink_nodes.emplace_back(input_node);
GELOGD("Name of getnext nosink is %s.", op_desc->GetName().c_str());
}
}
if (IsGetNextType(input_node)) {
@@ -111,6 +113,8 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector<NodePtr> &data_n
GE_CHECK_NOTNULL(data_node->GetOpDesc());
auto output_shape = data_node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims();
auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second;
GELOGD("The %zu data node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(),
formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str());
if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) {
GELOGI("No need to check sequence for constant.");
continue;
@@ -151,6 +155,8 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector<NodePtr> &get
for (size_t i = 0; i < data_count; ++i) {
auto output_shape = data_node->GetOpDesc()->GetOutputDesc(i).GetShape().GetDims();
auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second;
GELOGD("The %zu getnext node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(),
formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str());
if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) {
GELOGI("No need to check sequence for constant.");
continue;


+ 20
- 0
ge/host_cpu_engine/module.mk View File

@@ -80,6 +80,26 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_HOST_SHARED_LIBRARY}

#compiler for device ops kernel builder
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=

LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
libregister \

LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc

LOCAL_C_INCLUDES := $(local_lib_inc_path)

include ${BUILD_SHARED_LIBRARY}

#compiler for host static lib
include $(CLEAR_VARS)
LOCAL_MODULE := libhost_cpu_opskernel_builder


+ 2
- 2
ge/hybrid/executor/hybrid_execution_context.h View File

@@ -57,9 +57,9 @@ struct GraphExecutionContext {
do { \
if ((context != nullptr) && (context)->profiler != nullptr) { \
if (node_name != nullptr) { \
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, ##__VA_ARGS__);\
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\
} else { \
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \
}\
} \
} while (0)


+ 36
- 0
ge/hybrid/node_executor/aicpu/aicpu_ext_info.cc View File

@@ -57,6 +57,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) {
case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE:
GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed.");
break;
case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO:
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed.");
break;
default:
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.",
node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen);
@@ -123,6 +126,39 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) {
return SUCCESS;
}

Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) {
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID,
"Node[%s] parse ext session info failed as infoLen must be %zu but %u.",
node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen);

session_info_ = reinterpret_cast<AicpuSessionInfo *>(aicpu_ext_info->infoMsg);
GELOGI("Node[%s] parse session info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen);
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) {
if (session_info_ == nullptr) {
GELOGD("There is no session info in ext_info, no need update.");
return SUCCESS;
}

session_info_->sessionId = session_id;
session_info_->kernelId = kernel_id;
session_info_->sessFlag = sess_flag;
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) {
if (session_info_ == nullptr) {
GELOGD("There is no session info in ext_info, no need update.");
return SUCCESS;
}

session_info_->sessionId = session_id;
session_info_->sessFlag = true;
return SUCCESS;
}

Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc) {
GE_CHECK_LE(input_index, input_num_);
const auto &shape = input_desc.GetShape();


+ 8
- 0
ge/hybrid/node_executor/aicpu/aicpu_ext_info.h View File

@@ -19,6 +19,7 @@

#include "external/ge/ge_api_error_codes.h"
#include "cce/fwk_adpt_struct.h"
#include "cce/aicpu_engine_struct.h"
#include "graph/op_desc.h"
#include "graph/ge_tensor.h"

@@ -26,6 +27,7 @@ namespace ge {
namespace hybrid {
using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType;
using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo;
using AicpuSessionInfo = SessionInfo;

class AicpuExtInfoHandler {
public:
@@ -51,6 +53,10 @@ class AicpuExtInfoHandler {

Status UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc);

Status UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag);

Status UpdateSessionInfoSessionId(uint64_t session_id);

Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type);

private:
@@ -58,6 +64,7 @@ class AicpuExtInfoHandler {
Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info);
Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info);
Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info);
Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info);

static Status UpdateShapeAndType(const GeShape &shape,
DataType data_type,
@@ -72,6 +79,7 @@ class AicpuExtInfoHandler {
const uint32_t input_num_;
const uint32_t output_num_;
UnknowShapeOpType unknown_type_;
AicpuSessionInfo *session_info_ = nullptr;

std::unique_ptr<uint8_t[]> ext_info_;
size_t ext_info_len_ = 0;


+ 26
- 17
ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc View File

@@ -40,29 +40,36 @@ Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorB
return SUCCESS;
}

Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) {
if (node_item_->is_dynamic) {
// dynamic node must have ext info
GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info),
"Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.",
node_name_.c_str(), kernel_ext_info.size());
}

// if no ext info no need copy to device.
Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_t session_id) {
if (kernel_ext_info.empty()) {
GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.",
node_name_.c_str(), node_item_->is_dynamic ? "true" : "false");
return SUCCESS;
if (node_item_->is_dynamic) {
// dynamic node must have ext info
GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str());
return PARAM_INVALID;
} else {
// if no ext info no need copy to device.
GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.",
node_name_.c_str(), node_item_->is_dynamic ? "true" : "false");
return SUCCESS;
}
}

GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info),
"Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.",
node_name_.c_str(), kernel_ext_info.size());
GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id);
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id),
"UpdateSessionInfoSessionId failed.");

// copy task args buf
GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_),
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_),
"Node[%s] alloc kernel_ext_info buf failed, size=%zu",
node_name_.c_str(), kernel_ext_info.size());
node_name_.c_str(), aicpu_ext_handle_.GetExtInfoLen());

// copy default ext info to device
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(),
kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE));
aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(),
RT_MEMCPY_HOST_TO_DEVICE));

return SUCCESS;
}
@@ -290,7 +297,8 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) {
node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size);

// init ext info
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str());
uint64_t ext_session_id = model.GetSessionId();
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name_.c_str());
GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str());

// build fwk_op_kernel.
@@ -679,7 +687,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
"Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size);

GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str());
uint64_t ext_session_id = model.GetSessionId();
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name.c_str());

if (ext_info_addr_dev_ == nullptr) {
aicpu_param_head->extInfoLength = 0;


+ 1
- 1
ge/hybrid/node_executor/aicpu/aicpu_node_executor.h View File

@@ -43,7 +43,7 @@ class AicpuNodeTaskBase : public NodeTask {

Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
protected:
virtual Status InitExtInfo(const std::string &kernel_ext_info);
virtual Status InitExtInfo(const std::string &kernel_ext_info, int64_t session_id);

virtual Status UpdateExtInfo();



+ 5
- 5
ge/init/gelib.cc View File

@@ -110,7 +110,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) {
Status initSystemStatus = SystemInitialize(options);
GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize");
if (initSystemStatus != SUCCESS) {
GELOGE(initSystemStatus);
GELOGE(initSystemStatus, "GE system initial failed.");
RollbackInit();
return initSystemStatus;
}
@@ -120,7 +120,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) {
Status initEmStatus = engineManager_.Initialize(options);
GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize");
if (initEmStatus != SUCCESS) {
GELOGE(initEmStatus);
GELOGE(initEmStatus, "GE engine manager initial failed.");
RollbackInit();
return initEmStatus;
}
@@ -130,7 +130,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) {
Status initOpsStatus = opsManager_.Initialize(options);
GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize");
if (initOpsStatus != SUCCESS) {
GELOGE(initOpsStatus);
GELOGE(initOpsStatus, "GE ops manager initial failed.");
RollbackInit();
return initOpsStatus;
}
@@ -140,7 +140,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) {
Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options);
GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager");
if (initOpsBuilderStatus != SUCCESS) {
GELOGE(initOpsBuilderStatus);
GELOGE(initOpsBuilderStatus, "GE ops builder manager initial failed.");
RollbackInit();
return initOpsBuilderStatus;
}
@@ -150,7 +150,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) {
Status initSmStatus = sessionManager_.Initialize(options);
GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize");
if (initSmStatus != SUCCESS) {
GELOGE(initSmStatus);
GELOGE(initSmStatus, "GE session manager initial failed.");
RollbackInit();
return initSmStatus;
}


+ 1
- 1
ge/ir_build/atc_ir_common.cc View File

@@ -504,7 +504,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip
for (auto iter = options.begin(); iter != options.end(); iter++) {
std::string key = iter->first;
std::string option_name = iter->second;
GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str());
GELOGD("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str());
}
}



+ 6
- 6
ge/ir_build/ge_ir_build.cc View File

@@ -340,7 +340,7 @@ void Impl::SetRtSocVersion() {
if (rt_ret != RT_ERROR_NONE) {
GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret);
}
GELOGI("Set soc version %s success.", soc_version);
GELOGD("Set soc version %s success.", soc_version);
}
}

@@ -359,25 +359,25 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTe
GE_CHECK_NOTNULL(op);
if (op->GetType() == DATA) {
(void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++);
GELOGI("Data op inputDesc size: %zu", op->GetAllInputsDesc().size());
GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size());
ge::GeTensorDesc tensor = op->GetInputDesc(0);
string data_op_name = op->GetName();
GELOGI("Data op name: %s", data_op_name.c_str());
GELOGD("Data op name: %s", data_op_name.c_str());
ge::GeShape data_shape;
auto iter = omg_context_.input_dims.find(data_op_name);
if (iter != omg_context_.input_dims.end()) {
data_shape = ge::GeShape(iter->second);
GELOGI("Data op get shape from Context.");
GELOGD("Data op get shape from Context.");
} else {
data_shape = tensor.GetShape();
GELOGI("Data op get shape from InputDesc in ge ir graph.");
GELOGD("Data op get shape from InputDesc in ge ir graph.");
}
// If user point input format, do work for all data ops; else do according to tensor_desc
auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ?
ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat();
ge::DataType data_type = tensor.GetDataType();
string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type);
GELOGI("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str());
GELOGD("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str());

ge::GeTensor inputTensor;
ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type);


+ 1
- 1
ge/offline/CMakeLists.txt View File

@@ -69,7 +69,7 @@ target_link_libraries(atc PRIVATE
json
runtime_compile
slog
mmpa
static_mmpa
-lrt
-ldl
)


+ 9
- 7
ge/omm/csa_interact.cc View File

@@ -52,9 +52,11 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) {
if (!is_init_) {
dev_index_ = dev_index;
job_id_ = job_id;
char *file_dir_env = std::getenv(FMK_STATUS_FILE_DIR_ENV);

char file_dir_env[MMPA_MAX_PATH] = { 0x00 };
INT32 res = mmGetEnv(FMK_STATUS_FILE_DIR_ENV, file_dir_env, MMPA_MAX_PATH);
string csa_path_prefix;
if (file_dir_env != nullptr) {
if (res == EN_OK) {
csa_path_prefix = file_dir_env;
}
if (!csa_path_prefix.empty()) {
@@ -186,21 +188,21 @@ Status CsaInteract::WriteHcomDetection(const std::string &content) {
///
Status CsaInteract::WriteFile(const std::string &file_name, const std::string &content) {
// if file path is not exist, then make path
INT32 flags = O_WRONLY | O_TRUNC | O_CREAT;
int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP);
INT32 flags = M_WRONLY | O_TRUNC | M_CREAT;
int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD);
if (fd == EN_ERROR) {
if (MakePath(file_name) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "csainteract create file path fail, errno is %d", errno);
return INTERNAL_ERROR;
}
fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP);
fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD);
if (fd == EN_ERROR) {
GELOGE(INTERNAL_ERROR, "open file fail, errno is %d", errno);
return INTERNAL_ERROR;
}
}

ssize_t ret = write(fd, content.c_str(), content.length());
mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length());
if (ret == EN_ERROR) {
GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno);
ret = mmClose(fd);
@@ -239,7 +241,7 @@ Status CsaInteract::MakePath(const std::string &file_name) {
while (found != std::string::npos) {
std::string pre_path = file_path.substr(0, found + 1);
if (mmAccess(pre_path.c_str()) != EN_OK) {
if (mmMkdir(pre_path.c_str(), S_IRWXU) != EN_OK) {
if (mmMkdir(pre_path.c_str(), M_IRWXU) != EN_OK) {
GELOGE(INTERNAL_ERROR, "csainteract mkdir fail, errno is %d", errno);
return INTERNAL_ERROR;
}


+ 2
- 2
ge/opskernel_manager/ops_kernel_manager.cc View File

@@ -85,7 +85,7 @@ Status OpsKernelManager::Initialize(const map<string, string> &options_const) {
initialize_ = options;
Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_);
if (rst0 == FAILED) {
GELOGE(GE_OPS_GET_NO_VALID_SO);
GELOGE(GE_OPS_GET_NO_VALID_SO, "There is invalid so about OpsKernelInfoStore.");
return GE_OPS_GET_NO_VALID_SO;
}
Status rst1 =
@@ -391,7 +391,7 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name,
continue;
}
if (attrs.engineName == engine_name) {
GELOGI("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(),
GELOGD("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(),
attrs.engineName.c_str());
graph_optimizer.push_back(it.second);
}


+ 14
- 14
ge/session/session_manager.cc View File

@@ -61,7 +61,7 @@ Status SessionManager::SetRtContext(SessionId session_id, rtContext_t rt_context

Status SessionManager::CreateSession(const std::map<std::string, std::string> &options, SessionId &session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionId next_session_id = 0;
@@ -92,7 +92,7 @@ Status SessionManager::CreateSession(const std::map<std::string, std::string> &o

Status SessionManager::DestroySession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
std::lock_guard<std::mutex> lock(mutex_);
@@ -119,7 +119,7 @@ Status SessionManager::DestroySession(SessionId session_id) {

Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -143,7 +143,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -173,7 +173,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G
Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph,
const std::map<std::string, std::string> &options) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -203,7 +203,7 @@ Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id,
Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs,
std::vector<Tensor> &outputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -221,7 +221,7 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s

Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -239,7 +239,7 @@ Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) {

bool SessionManager::HasSession(SessionId session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return false;
}
return session_manager_map_.find(session_id) != session_manager_map_.end();
@@ -247,7 +247,7 @@ bool SessionManager::HasSession(SessionId session_id) {

Status SessionManager::GetNextSessionId(SessionId &next_session_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
static SessionId session_id = 0;
@@ -260,7 +260,7 @@ Status SessionManager::RegisterCallBackFunc(
SessionId session_id, const std::string &key,
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -278,7 +278,7 @@ Status SessionManager::RegisterCallBackFunc(

Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -297,7 +297,7 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const
Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id,
const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -317,7 +317,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:
std::vector<Tensor> &var_values) {
// step 0: init session manager
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return GE_SESSION_MANAGER_NOT_INIT;
}
SessionPtr innerSession = nullptr;
@@ -383,7 +383,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std:

bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) {
if (!init_flag_) {
GELOGE(GE_SESSION_MANAGER_NOT_INIT);
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized.");
return true;
}
SessionPtr innerSession = nullptr;


+ 3
- 15
ge/single_op/single_op.cc View File

@@ -44,8 +44,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() {
delete task;
task = nullptr;
}
GELOGI("SingleOp destory sessionId = %lu", aicpu_session_id_);
ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_);
}

Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs) {
@@ -59,7 +57,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
for (size_t i = 0; i < num_inputs; ++i) {
// preventing from read out of bound
size_t aligned_size = GetAlignedSize(inputs[i].length);
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu",
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%zu",
i, aligned_size, inputs[i].length, input_sizes_[i]);
if (aligned_size < input_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu,"
@@ -77,7 +75,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::
for (size_t i = 0; i < num_outputs; ++i) {
// preventing from write out of bound
size_t aligned_size = GetAlignedSize(outputs[i].length);
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu",
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%zu",
i, aligned_size, outputs[i].length, output_sizes_[i]);
if (aligned_size < output_sizes_[i]) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu,"
@@ -143,7 +141,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
GE_CHECK_NOTNULL(task_io_addr);
auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr));
for (size_t i = 0; i < io_addr_num; ++i) {
io_addr[i] = reinterpret_cast<uintptr_t>(args_[i]);
io_addr[i] = static_cast<uintptr_t>(args_[i]);
}
} else {
GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType());
@@ -180,17 +178,11 @@ void SingleOp::SetStream(rtStream_t stream) {
stream_ = stream;
}

void SingleOp::SetSessionID(uint64_t session_id) {
aicpu_session_id_ = session_id;
}

DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream)
: resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) {
}

DynamicSingleOp::~DynamicSingleOp() {
GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_);
ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_);
}

Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
@@ -299,8 +291,4 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
}
}

void DynamicSingleOp::SetSessionID(uint64_t session_id) {
aicpu_session_id_ = session_id;
}
} // namespace ge

+ 0
- 4
ge/single_op/single_op.h View File

@@ -37,7 +37,6 @@ class SingleOp {

Status ExecuteAsync(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
void SetStream(rtStream_t stream);
void SetSessionID(uint64_t session_id);

private:
Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
@@ -52,7 +51,6 @@ class SingleOp {
std::vector<void *> output_addr_list_;
std::vector<size_t> output_sizes_;
std::vector<uintptr_t> args_;
uint64_t aicpu_session_id_ = 0;

std::vector<OpTask *> tasks_;
std::vector<std::vector<uintptr_t *>> arg_table_;
@@ -66,7 +64,6 @@ class DynamicSingleOp {
const std::vector<DataBuffer> &inputs,
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs);
void SetSessionID(uint64_t session_id);

private:
friend class SingleOpModel;
@@ -89,7 +86,6 @@ class DynamicSingleOp {
rtStream_t stream_ = nullptr;
size_t num_inputs_ = 0;
size_t num_outputs_ = 0;
uint64_t aicpu_session_id_ = 0;
};
} // namespace ge
#endif // GE_SINGLE_OP_SINGLE_OP_H_

+ 17
- 15
ge/single_op/single_op_model.cc View File

@@ -32,7 +32,7 @@
#include "task/aicpu_kernel_task_builder.h"
#include "task/tbe_task_builder.h"

static std::atomic<std::uint64_t> aicpu_sessionid(0);
static std::atomic<std::uint64_t> aicpu_kernel_id(0);

using domi::TaskDef;
using std::unique_ptr;
@@ -252,7 +252,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
OpTask *task = nullptr;
auto ret = BuildCpuKernelTask(task_def.kernel(), &task);
uint64_t singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id);
auto ret = BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id);
if (ret != SUCCESS) {
return ret;
}
@@ -265,14 +267,13 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) {
GELOGD("Building AICPU_TF task");
AiCpuTask *aicpu_task = nullptr;
bool depend_compute_flag = false;
uint64_t singleop_sessionid = aicpu_sessionid++;
GELOGI("Build singleOp, sessionId = %lu", singleop_sessionid);
auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_sessionid);
uint64_t singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id);
auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id);
if (ret != SUCCESS) {
return ret;
}
single_op.tasks_.emplace_back(aicpu_task);
single_op.SetSessionID(singleop_sessionid);
} else {
// skip
GELOGD("Skip task type: %d", static_cast<int>(task_type));
@@ -329,7 +330,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa
}

Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) {
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) {
auto iter = op_list_.find(kernel_def.op_index());
if (iter == op_list_.end()) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index());
@@ -342,7 +343,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def);
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, session_id);
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "build aicpu_TF op task failed");
return ret;
@@ -353,7 +354,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC
return SUCCESS;
}

Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task) {
Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) {
const auto &context = kernel_def.context();
auto iter = op_list_.find(context.op_index());
if (iter == op_list_.end()) {
@@ -367,7 +368,7 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa
}

auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def);
auto ret = builder.BuildTask(*aicpucc_task);
auto ret = builder.BuildTask(*aicpucc_task, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "build aicpu_CC op task failed");
return ret;
@@ -396,7 +397,9 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
OpTask *task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task));
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id));
single_op.op_task_.reset(task);
} else {
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID,
@@ -430,10 +433,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GELOGD("Building AICPU_TF task");
AiCpuTask *aicpu_task = nullptr;
bool depend_compute_flag = false;
uint64_t dynamic_singleop_sessionid = aicpu_sessionid++;
GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid);
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++;
GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id);
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true,
depend_compute_flag, dynamic_singleop_sessionid));
depend_compute_flag, dynamic_singleop_kernel_id));
if (depend_compute_flag) {
if (i >= tasks.size() - 1) {
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "The copy task of the fourth operator was not found.");
@@ -444,7 +447,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex()));
}
single_op.op_task_.reset(aicpu_task);
single_op.SetSessionID(dynamic_singleop_sessionid);
} else {
// skip
GELOGD("Skip task type: %d", static_cast<int>(task_type));


+ 2
- 2
ge/single_op/single_op_model.h View File

@@ -69,8 +69,8 @@ class SingleOpModel {
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task);
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op);

static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);


+ 2
- 2
ge/single_op/task/aicpu_kernel_task_builder.cc View File

@@ -46,7 +46,7 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) {
return SUCCESS;
}

Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) {
auto ret = SetKernelArgs(task);
if (ret != SUCCESS) {
return ret;
@@ -76,7 +76,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) {
"task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.",
kernel_ext_info.size(), kernel_ext_info_size);

ret = task.SetExtInfoAndType(kernel_ext_info);
ret = task.SetExtInfoAndType(kernel_ext_info, kernel_id);
if (ret != SUCCESS) {
GELOGE(ret, "Init ext info failed.");
return ret;


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save