@@ -56,7 +56,7 @@ if (ENABLE_OPEN_SRC) | |||
set(GE_LIB_PATH ${GE_LIB_PATH}/${GE_SYS_ARCH}) | |||
set(STATIC_ACL_LIB ${GE_LIB_PATH}) | |||
find_module(slog libslog.so ${GE_LIB_PATH}) | |||
find_module(mmpa libmmpa.so ${GE_LIB_PATH}) | |||
find_module(static_mmpa libmmpa.a ${GE_LIB_PATH}) | |||
find_module(msprof libmsprof.so ${GE_LIB_PATH}) | |||
find_module(hccl libhccl.so ${GE_LIB_PATH}) | |||
find_module(adump_server libadump_server.a ${GE_LIB_PATH}) | |||
@@ -67,10 +67,10 @@ if (ENABLE_OPEN_SRC) | |||
find_module(ascend_hal_stub libascend_hal.so ${GE_LIB_PATH}) | |||
find_module(error_manager_static liberror_manager.a ${GE_LIB_PATH}) | |||
find_module(msprofiler libmsprofiler.a ${GE_LIB_PATH}) | |||
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||
#find_module(ascendcl_static libascendcl.a ${GE_LIB_PATH}) | |||
else() | |||
find_module(slog libslog.so ${ASCEND_ATC_DIR}) | |||
find_module(mmpa libmmpa.so ${ASCEND_ATC_DIR}) | |||
find_module(static_mmpa libmmpa.a ${ASCEND_ATC_DIR}) | |||
if(PLATFORM STREQUAL "train") | |||
find_module(msprof libmsprof.so ${ASCEND_DRIVER_COMMON_DIR}) | |||
find_module(hccl libhccl.so ${ASCEND_RUNTIME_DIR}) | |||
@@ -91,7 +91,7 @@ if (ENABLE_OPEN_SRC) | |||
find_module(error_manager liberror_manager.so ${ASCEND_ATC_DIR}) | |||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
if(PRODUCT STREQUAL "flr3") | |||
find_module(msprof libmsprof.so ${ASCEND_DRIVER_SHARE_DIR}) | |||
elseif(PRODUCT STREQUAL "flr1") | |||
@@ -114,7 +114,7 @@ if (ENABLE_OPEN_SRC) | |||
find_module(error_manager_static liberror_manager.a ${ASCEND_ACL_DIR}) | |||
find_module(msprofiler libmsprofiler.a ${ASCEND_ACL_DIR}) | |||
find_module(ascend_hal_stub libascend_hal.so ${ASCEND_DRIVER_DIR}/driver) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
#find_module(ascendcl_static libascendcl.a ${ASCEND_ACL_DIR}) | |||
else() | |||
message(FATAL_ERROR "PLATFORM param is invalid, should be train or inference, build terminated") | |||
endif() | |||
@@ -148,16 +148,23 @@ elseif (ENABLE_D OR ENABLE_ACL) | |||
# common libraries | |||
find_module(slog libslog.so ${ASCEND_MS_DRIVER_PATH}) | |||
find_module(error_manager liberror_manager.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
find_module(static_mmpa libmmpa.a ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
if (ENABLE_D) | |||
# training | |||
find_module(mmpa libmmpa.so ${ASCEND_MS_DRIVER_PATH}) | |||
find_module(runtime libruntime.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
find_module(register libregister.so ${ASCEND_MS_RUNTIME_PATH} ${ATLAS_MS_RUNTIME_PATH}) | |||
endif () | |||
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | |||
add_subdirectory(metadef) | |||
elseif(ENABLE_MS_TESTCASE) | |||
include(cmake/external_libs/protobuf_static.cmake) | |||
include(cmake/external_libs/securec.cmake) | |||
include(cmake/intf_pub_linux.cmake) | |||
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/metadef) | |||
add_subdirectory(metadef) | |||
else() | |||
set(METADEF_DIR ${CMAKE_CURRENT_LIST_DIR}/../metadef) | |||
set(PARSER_DIR ${CMAKE_CURRENT_LIST_DIR}/../parser) | |||
@@ -48,7 +48,7 @@ set_target_properties(ascend_protobuf_static_lib PROPERTIES | |||
add_library(ascend_protobuf_static INTERFACE) | |||
target_include_directories(ascend_protobuf_static INTERFACE ${PROTOBUF_STATIC_PKG_DIR}/include) | |||
target_link_libraries(ascend_protobuf_static INTERFACE ascend_protobuf_static_lib) | |||
if (ENABLE_D OR ENABLE_ACL) | |||
if (ENABLE_D OR ENABLE_ACL OR ENABLE_MS_TESTCASES) | |||
include_directories(${PROTOBUF_STATIC_PKG_DIR}/include) | |||
endif () | |||
@@ -1,4 +1,4 @@ | |||
if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
add_subdirectory(common) | |||
add_subdirectory(plugin/engine) | |||
add_subdirectory(graph/build/memory) | |||
@@ -600,7 +600,7 @@ set(INFER_SRC_LIST | |||
"analyzer/analyzer.cc" | |||
) | |||
if (NOT ENABLE_D AND NOT ENABLE_ACL) | |||
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | |||
############ libge_runner.so ############ | |||
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | |||
@@ -648,6 +648,7 @@ target_link_libraries(ge_runner | |||
ge_memory | |||
adump_server | |||
msprofiler | |||
static_mmpa | |||
-Wl,--no-as-needed | |||
graph | |||
ge_common | |||
@@ -655,7 +656,6 @@ target_link_libraries(ge_runner | |||
register | |||
c_sec | |||
slog | |||
mmpa | |||
msprof | |||
runtime | |||
resource | |||
@@ -712,6 +712,7 @@ target_include_directories(ge_compiler PRIVATE | |||
target_link_libraries(ge_compiler | |||
$<BUILD_INTERFACE:intf_pub> | |||
ge_memory | |||
static_mmpa | |||
-Wl,--no-as-needed | |||
graph | |||
ge_common | |||
@@ -720,7 +721,6 @@ target_link_libraries(ge_compiler | |||
c_sec | |||
error_manager | |||
slog | |||
mmpa | |||
runtime_compile | |||
resource | |||
-Wl,--as-needed | |||
@@ -770,6 +770,7 @@ target_link_libraries(opensrc_ascendcl PRIVATE | |||
ge_executor | |||
ge_common_static | |||
graph_static | |||
static_mmpa | |||
ascend_protobuf_static | |||
register_static | |||
error_manager_static | |||
@@ -779,11 +780,11 @@ target_link_libraries(opensrc_ascendcl PRIVATE | |||
-Wl,--no-as-needed | |||
c_sec | |||
runtime | |||
mmpa | |||
slog | |||
msprof | |||
ascend_hal_stub | |||
-Wl,--as-needed | |||
-lrt | |||
-ldl | |||
json | |||
) | |||
@@ -177,7 +177,7 @@ Session::Session(const std::map<string, string> &options) { | |||
// check init status | |||
sessionId_ = 0; | |||
if (!g_ge_initialized) { | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED); | |||
GELOGE(GE_CLI_GE_NOT_INITIALIZED, "GE is not initialized."); | |||
return; | |||
} | |||
// call Initialize | |||
@@ -105,6 +105,7 @@ target_include_directories(ge_common PRIVATE | |||
target_link_libraries(ge_common PRIVATE | |||
$<BUILD_INTERFACE:intf_pub> | |||
static_mmpa | |||
-Wl,--no-as-needed | |||
graph | |||
ascend_protobuf | |||
@@ -112,7 +113,6 @@ target_link_libraries(ge_common PRIVATE | |||
c_sec | |||
error_manager | |||
slog | |||
mmpa | |||
-Wl,--as-needed | |||
json | |||
-lrt | |||
@@ -210,7 +210,7 @@ target_link_libraries(ge_common PRIVATE | |||
c_sec | |||
error_manager | |||
slog | |||
mmpa | |||
static_mmpa | |||
-Wl,--as-needed | |||
json | |||
-lrt | |||
@@ -16,9 +16,7 @@ | |||
#include "common/auth/file_saver.h" | |||
#include <fcntl.h> | |||
#include <securec.h> | |||
#include <unistd.h> | |||
#include <cstdlib> | |||
#include <fstream> | |||
#include <vector> | |||
@@ -39,12 +37,12 @@ Status FileSaver::OpenFile(int32_t &fd, const std::string &file_path) { | |||
return FAILED; | |||
} | |||
char real_path[PATH_MAX] = {0}; | |||
GE_IF_BOOL_EXEC(realpath(file_path.c_str(), real_path) == nullptr, | |||
char real_path[MMPA_MAX_PATH] = {0}; | |||
GE_IF_BOOL_EXEC(mmRealPath(file_path.c_str(), real_path, MMPA_MAX_PATH) != EN_OK, | |||
GELOGI("File %s is not exist, it will be created.", file_path.c_str())); | |||
// Open file | |||
mode_t mode = S_IRUSR | S_IWUSR; | |||
fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | |||
mmMode_t mode = M_IRUSR | M_IWUSR; | |||
fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode); | |||
if (fd == EN_INVALID_PARAM || fd == EN_ERROR) { | |||
// -1: Failed to open file; - 2: Illegal parameter | |||
GELOGE(FAILED, "Open file failed. mmpa_errno = %d, %s", fd, strerror(errno)); | |||
@@ -194,7 +192,7 @@ Status FileSaver::SaveToBuffWithFileHeader(const ModelFileHeader &file_header, | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status FileSaver::CheckPath(const std::string &file_path) { | |||
// Determine file path length | |||
if (file_path.size() >= PATH_MAX) { | |||
if (file_path.size() >= MMPA_MAX_PATH) { | |||
GELOGE(FAILED, "Path is too long:%zu", file_path.size()); | |||
return FAILED; | |||
} | |||
@@ -25,7 +25,7 @@ void CustAICPUKernelStore::AddCustAICPUKernel(const CustAICPUKernelPtr &kernel) | |||
} | |||
void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr<ge::OpDesc> &op_desc) const { | |||
GELOGI("LoadCustAICPUKernelBinToOpDesc in"); | |||
GELOGD("LoadCustAICPUKernelBinToOpDesc in"); | |||
if (op_desc != nullptr) { | |||
auto kernel_bin = FindKernel(op_desc->GetName()); | |||
if (kernel_bin != nullptr) { | |||
@@ -34,6 +34,6 @@ void CustAICPUKernelStore::LoadCustAICPUKernelBinToOpDesc(const std::shared_ptr< | |||
GELOGI("Load cust aicpu kernel:%s, %zu", kernel_bin->GetName().c_str(), kernel_bin->GetBinDataSize()); | |||
} | |||
} | |||
GELOGI("LoadCustAICPUKernelBinToOpDesc success"); | |||
GELOGD("LoadCustAICPUKernelBinToOpDesc success"); | |||
} | |||
} // namespace ge |
@@ -16,9 +16,6 @@ | |||
#include "common/debug/memory_dumper.h" | |||
#include <fcntl.h> | |||
#include <unistd.h> | |||
#include <string> | |||
#include "framework/common/debug/log.h" | |||
@@ -138,26 +135,26 @@ int MemoryDumper::OpenFile(const char *filename) { | |||
} | |||
// Get the absolute path | |||
string real_path; | |||
char tmp_path[PATH_MAX] = {0}; | |||
char tmp_path[MMPA_MAX_PATH] = {0}; | |||
GE_IF_BOOL_EXEC( | |||
-1 != path_split_pos, string prefix_path = std::string(filename).substr(0, path_split_pos); | |||
string last_path = std::string(filename).substr(path_split_pos, strlen(filename) - 1); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(realpath(prefix_path.c_str(), tmp_path) == nullptr, return kInvalidFd, | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(prefix_path.length() >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(mmRealPath(prefix_path.c_str(), tmp_path, MMPA_MAX_PATH) != EN_OK, return kInvalidFd, | |||
"Dir %s does not exit.", prefix_path.c_str()); | |||
real_path = std::string(tmp_path) + last_path;) | |||
GE_IF_BOOL_EXEC( | |||
path_split_pos == -1 || path_split_pos == 0, | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= PATH_MAX, return kInvalidFd, "Prefix path is too long!"); | |||
GE_IF_BOOL_EXEC(realpath(filename, tmp_path) == nullptr, | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(filename) >= MMPA_MAX_PATH, return kInvalidFd, "Prefix path is too long!"); | |||
GE_IF_BOOL_EXEC(mmRealPath(filename, tmp_path, MMPA_MAX_PATH) != EN_OK, | |||
GELOGI("File %s does not exit, it will be created.", filename)); | |||
real_path = std::string(tmp_path);) | |||
// Open file, only the current user can read and write, to avoid malicious application access | |||
// Using the O_EXCL, if the file already exists,return failed to avoid privilege escalation vulnerability. | |||
mode_t mode = S_IRUSR | S_IWUSR; | |||
mmMode_t mode = M_IRUSR | M_IWUSR; | |||
int32_t fd = mmOpen2(real_path.c_str(), O_RDWR | O_CREAT | O_APPEND, mode); | |||
int32_t fd = mmOpen2(real_path.c_str(), M_RDWR | M_CREAT | O_TRUNC, mode); | |||
if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | |||
GELOGE(kInvalidFd, "open file failed. errno = %d, %s", fd, strerror(errno)); | |||
return kInvalidFd; | |||
@@ -118,19 +118,19 @@ Status TransFormatFromNchwToFzC04(const TransArgs &args, TransResult &result) { | |||
// data overflow check totally | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
return INTERNAL_ERROR); | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
return INTERNAL_ERROR); | |||
auto t1 = h_o * w_o; | |||
auto t2 = n_o * c_o; | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2); | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
return INTERNAL_ERROR); | |||
int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
return INTERNAL_ERROR); | |||
int64_t dst_size = total_ele_cnt * size; | |||
if (dst_size == 0) { | |||
@@ -205,20 +205,20 @@ Status PaddingNC(const TransArgs &args, TransArgs &args_tmp, std::shared_ptr<uin | |||
// data overflow check | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(h_o, w_o), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", h_o, w_o); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", h_o, w_o); | |||
return INTERNAL_ERROR); | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(n_o, c_o), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", n_o, c_o); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", n_o, c_o); | |||
return INTERNAL_ERROR); | |||
auto t1 = h_o * w_o; | |||
auto t2 = n_o * c_o; | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", t1, t2); | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(t1, t2), GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%ld]", t1, t2); | |||
return INTERNAL_ERROR); | |||
int64_t total_ele_cnt = n_o * c_o * h_o * w_o; | |||
int size = GetSizeByDataType(args.src_data_type); | |||
GE_IF_BOOL_EXEC(!CheckInt64MulOverflow(total_ele_cnt, size), | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%lld], B[%lld]", total_ele_cnt, size); | |||
GELOGE(INTERNAL_ERROR, "int64 mul overflow.A[%ld], B[%d]", total_ele_cnt, size); | |||
return INTERNAL_ERROR); | |||
int64_t dst_size = total_ele_cnt * size; | |||
@@ -30,8 +30,10 @@ const uint8_t kPrefixIndex = 9; | |||
namespace ge { | |||
void OpTilingManager::ClearHandles() noexcept { | |||
for (const auto &handle : handles_) { | |||
if (dlclose(handle.second) != 0) { | |||
GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), dlerror()); | |||
if (mmDlclose(handle.second) != 0) { | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGE(FAILED, "Failed to close handle of %s: %s", handle.first.c_str(), error); | |||
} | |||
} | |||
handles_.clear(); | |||
@@ -40,11 +42,12 @@ void OpTilingManager::ClearHandles() noexcept { | |||
OpTilingManager::~OpTilingManager() { ClearHandles(); } | |||
std::string OpTilingManager::GetPath() { | |||
const char *opp_path_env = std::getenv(kEnvName); | |||
char opp_path_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(kEnvName, opp_path_env, MMPA_MAX_PATH); | |||
std::string opp_path = kDefaultPath; | |||
if (opp_path_env != nullptr) { | |||
char resolved_path[PATH_MAX]; | |||
if (realpath(opp_path_env, resolved_path) == NULL) { | |||
if (res == EN_OK) { | |||
char resolved_path[MMPA_MAX_PATH]; | |||
if (mmRealPath(opp_path_env, resolved_path, MMPA_MAX_PATH) != EN_OK) { | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19024", {"env", "value", "situation"}, {"ASCEND_OPP_PATH", opp_path_env, "loading the tiling lib"}); | |||
GELOGE(PARAM_INVALID, "Failed load tiling lib as env 'ASCEND_OPP_PATH'[%s] is invalid path.", opp_path_env); | |||
@@ -66,16 +69,20 @@ void OpTilingManager::LoadSo() { | |||
std::string built_in_name = kDefaultBuiltInTilingPath.substr(kPrefixIndex); | |||
std::string custom_name = kDefaultCustomTilingPath.substr(kPrefixIndex); | |||
void *handle_bi = dlopen(built_in_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
void *handle_bi = mmDlopen(built_in_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
if (handle_bi == nullptr) { | |||
GELOGW("Failed to dlopen %s!", dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to dlopen %s!", error); | |||
} else { | |||
handles_[built_in_name] = handle_bi; | |||
} | |||
void *handle_ct = dlopen(custom_tiling_lib.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
void *handle_ct = mmDlopen(custom_tiling_lib.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
if (handle_ct == nullptr) { | |||
GELOGW("Failed to dlopen %s!", dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to dlopen %s!", error); | |||
} else { | |||
handles_[custom_name] = handle_ct; | |||
} | |||
@@ -16,9 +16,7 @@ | |||
#include "common/ge/plugin_manager.h" | |||
#include <dirent.h> | |||
#include <sys/stat.h> | |||
#include <unistd.h> | |||
#include <algorithm> | |||
#include <cstring> | |||
#include <fstream> | |||
@@ -38,8 +36,10 @@ const char *const kExt = ".so"; // supported extension of shared obje | |||
namespace ge { | |||
void PluginManager::ClearHandles_() noexcept { | |||
for (const auto &handle : handles_) { | |||
if (dlclose(handle.second) != 0) { | |||
GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), dlerror()); | |||
if (mmDlclose(handle.second) != 0) { | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to close handle of %s: %s", handle.first.c_str(), error); | |||
} | |||
} | |||
handles_.clear(); | |||
@@ -48,18 +48,18 @@ void PluginManager::ClearHandles_() noexcept { | |||
PluginManager::~PluginManager() { ClearHandles_(); } | |||
string PluginManager::GetPath() { | |||
Dl_info dl_info; | |||
if (dladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) == 0) { | |||
mmDlInfo dl_info; | |||
if (mmDladdr(reinterpret_cast<void *>(&PluginManager::GetPath), &dl_info) != EN_OK) { | |||
GELOGW("Failed to read the shared library file path!"); | |||
return string(); | |||
} else { | |||
std::string so_path = dl_info.dli_fname; | |||
char path[PATH_MAX] = {0}; | |||
if (so_path.length() >= PATH_MAX) { | |||
char path[MMPA_MAX_PATH] = {0}; | |||
if (so_path.length() >= MMPA_MAX_PATH) { | |||
GELOGW("The shared library file path is too long!"); | |||
return string(); | |||
} | |||
if (realpath(so_path.c_str(), path) == nullptr) { | |||
if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) { | |||
GELOGW("Failed to get realpath of %s", so_path.c_str()); | |||
return string(); | |||
} | |||
@@ -93,7 +93,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
std::vector<std::string> path_vec; | |||
SplitPath(path, path_vec); | |||
for (const auto &single_path : path_vec) { | |||
GE_IF_BOOL_EXEC(single_path.length() >= PATH_MAX, GELOGE(GE_PLGMGR_PATH_INVALID, | |||
GE_IF_BOOL_EXEC(single_path.length() >= MMPA_MAX_PATH, GELOGE(GE_PLGMGR_PATH_INVALID, | |||
"The shared library file path is too long!"); | |||
continue); | |||
// load break when number of loaded so reach maximum | |||
@@ -119,16 +119,18 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
GELOGI("dlopen the shared library path name: %s.", file_path_dlopen.c_str()); | |||
// load continue when dlopen is failed | |||
auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
if (handle == nullptr) { | |||
GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGE(GE_PLGMGR_PATH_INVALID, "Failed to dlopen %s!", error); | |||
continue; | |||
} | |||
// load continue when so is invalid | |||
bool is_valid = true; | |||
for (const auto &func_name : func_check_list) { | |||
auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | |||
auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGE(GE_PLGMGR_PATH_INVALID, "%s is skipped since function %s is not existed!", func_name.c_str(), | |||
func_name.c_str()); | |||
@@ -137,7 +139,7 @@ Status PluginManager::LoadSo(const string &path, const vector<string> &func_chec | |||
} | |||
} | |||
if (!is_valid) { | |||
GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||
GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose."); | |||
continue; | |||
} | |||
@@ -197,22 +199,29 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||
so_list_.clear(); | |||
ClearHandles_(); | |||
char canonical_path[PATH_MAX] = {0}; | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= PATH_MAX, GELOGW("File path is too long!"); | |||
char canonical_path[MMPA_MAX_PATH] = {0}; | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path.length() >= MMPA_MAX_PATH, GELOGW("File path is too long!"); | |||
return FAILED, "File path is too long!"); | |||
if (realpath(path.c_str(), canonical_path) == nullptr) { | |||
if (mmRealPath(path.c_str(), canonical_path, MMPA_MAX_PATH) != EN_OK) { | |||
GELOGW("Failed to get realpath of %s", path.c_str()); | |||
return SUCCESS; | |||
} | |||
DIR *dir = opendir(canonical_path); | |||
if (dir == nullptr) { | |||
GELOGW("Invalid path for load: %s", path.c_str()); | |||
return SUCCESS; | |||
INT32 is_dir = mmIsDir(canonical_path); | |||
// Lib plugin path not exist | |||
if (is_dir != EN_OK) { | |||
GELOGW("Invalid path for load: %s", path.c_str()); | |||
return SUCCESS; | |||
} | |||
struct dirent *entry = nullptr; | |||
while ((entry = readdir(dir)) != nullptr) { | |||
mmDirent **entries = nullptr; | |||
auto ret = mmScandir(canonical_path, &entries, nullptr, nullptr); | |||
if (ret < EN_OK) { | |||
GELOGW("scan dir failed. path = %s, ret = %d", canonical_path, ret); | |||
return FAILED; | |||
} | |||
for (int i = 0; i < ret; ++i) { | |||
mmDirent *entry = entries[i]; | |||
// read fileName and fileType | |||
std::string file_name = entry->d_name; | |||
unsigned char file_type = entry->d_type; | |||
@@ -250,9 +259,11 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||
GELOGI("Dlopen so path name: %s. ", file_path_dlopen.c_str()); | |||
// load continue when dlopen is failed | |||
auto handle = dlopen(file_path_dlopen.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
auto handle = mmDlopen(file_path_dlopen.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
if (handle == nullptr) { | |||
GELOGW("Failed in dlopen %s!", dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed in dlopen %s!", error); | |||
continue; | |||
} | |||
@@ -261,7 +272,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||
// load continue when so is invalid | |||
bool is_valid = true; | |||
for (const auto &func_name : func_check_list) { | |||
auto real_fn = (void (*)())dlsym(handle, func_name.c_str()); | |||
auto real_fn = (void (*)())mmDlsym(handle, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("The %s is skipped since function %s is not existed!", file_name.c_str(), func_name.c_str()); | |||
is_valid = false; | |||
@@ -269,7 +280,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||
} | |||
} | |||
if (!is_valid) { | |||
GE_LOGE_IF(dlclose(handle), "Failed to dlclose."); | |||
GE_LOGE_IF(mmDlclose(handle), "Failed to dlclose."); | |||
continue; | |||
} | |||
@@ -279,7 +290,7 @@ Status PluginManager::Load(const string &path, const vector<string> &func_check_ | |||
handles_[string(file_name)] = handle; | |||
num_of_loaded_so++; | |||
} | |||
closedir(dir); | |||
mmScandirFree(entries, ret); | |||
if (num_of_loaded_so == 0) { | |||
GELOGW("No loadable shared library found in the path: %s", path.c_str()); | |||
return SUCCESS; | |||
@@ -17,7 +17,6 @@ | |||
#ifndef GE_COMMON_GE_PLUGIN_MANAGER_H_ | |||
#define GE_COMMON_GE_PLUGIN_MANAGER_H_ | |||
#include <dlfcn.h> | |||
#include <functional> | |||
#include <iostream> | |||
#include <map> | |||
@@ -30,6 +29,7 @@ | |||
#include "common/ge_inner_error_codes.h" | |||
#include "engine/dnnengine.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "mmpa/mmpa_api.h" | |||
namespace ge { | |||
using SoToHandleMap = std::map<std::string, void *>; | |||
@@ -57,7 +57,7 @@ class PluginManager { | |||
template <typename R, typename... Types> | |||
Status GetAllFunctions(const string &func_name, map<string, function<R(Types... args)>> &funcs) { | |||
for (const auto &handle : handles_) { | |||
auto real_fn = (R(*)(Types...))dlsym(handle.second, func_name.c_str()); | |||
auto real_fn = (R(*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("Failed to get function %s in %s!", func_name.c_str(), handle.first.c_str()); | |||
return GE_PLGMGR_FUNC_NOT_EXIST; | |||
@@ -72,7 +72,7 @@ class PluginManager { | |||
Status InvokeAll(const string &func_name, Types... args) { | |||
for (const auto &handle : handles_) { | |||
// If the funcName is existed, signature of realFn can be casted to any type | |||
auto real_fn = (void (*)(Types...))dlsym(handle.second, func_name.c_str()); | |||
auto real_fn = (void (*)(Types...))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | |||
return GE_PLGMGR_INVOKE_FAILED; | |||
@@ -87,7 +87,7 @@ class PluginManager { | |||
Status InvokeAll(const string &func_name, T arg) { | |||
for (const auto &handle : handles_) { | |||
// If the funcName is existed, signature of realFn can be casted to any type | |||
auto real_fn = (void (*)(T))dlsym(handle.second, func_name.c_str()); | |||
auto real_fn = (void (*)(T))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | |||
return GE_PLGMGR_INVOKE_FAILED; | |||
@@ -112,7 +112,7 @@ class PluginManager { | |||
Status InvokeAll(const string &func_name, T1 arg) { | |||
for (const auto &handle : handles_) { | |||
// If the funcName is existed, signature of realFn can be casted to any type | |||
auto real_fn = (T2(*)(T1))dlsym(handle.second, func_name.c_str()); | |||
auto real_fn = (T2(*)(T1))mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | |||
return GE_PLGMGR_INVOKE_FAILED; | |||
@@ -130,7 +130,7 @@ class PluginManager { | |||
Status InvokeAll(const string &func_name) { | |||
for (const auto &handle : handles_) { | |||
// If the funcName is existed, signature of realFn can be casted to any type | |||
auto real_fn = (T(*)())dlsym(handle.second, func_name.c_str()); | |||
auto real_fn = (T(*)())mmDlsym(handle.second, const_cast<char *>(func_name.c_str())); | |||
if (real_fn == nullptr) { | |||
GELOGW("Failed to invoke function %s in %s!", func_name.c_str(), handle.first.c_str()); | |||
return GE_PLGMGR_INVOKE_FAILED; | |||
@@ -16,8 +16,6 @@ | |||
#include "common/ge/tbe_plugin_manager.h" | |||
#include <dirent.h> | |||
#include <unistd.h> | |||
#include <algorithm> | |||
#include <cstring> | |||
#include <fstream> | |||
@@ -50,9 +48,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY TBEPluginManager &TBEPluginMana | |||
Status TBEPluginManager::ClearHandles_() { | |||
Status ret = SUCCESS; | |||
for (const auto &handle : handles_vec_) { | |||
if (dlclose(handle) != 0) { | |||
if (mmDlclose(handle) != 0) { | |||
ret = FAILED; | |||
GELOGW("Failed to close handle: %s", dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to close handle: %s", error); | |||
} | |||
} | |||
handles_vec_.clear(); | |||
@@ -65,18 +65,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status TBEPluginManager::Finali | |||
} | |||
string TBEPluginManager::GetPath() { | |||
Dl_info dl_info; | |||
if (dladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) == 0) { | |||
mmDlInfo dl_info; | |||
if (mmDladdr(reinterpret_cast<void *>(&TBEPluginManager::GetPath), &dl_info) != EN_OK) { | |||
GELOGW("Failed to read so path!"); | |||
return string(); | |||
} else { | |||
string so_path = dl_info.dli_fname; | |||
char path[PATH_MAX] = {0}; | |||
if (so_path.length() >= PATH_MAX) { | |||
char path[MMPA_MAX_PATH] = {0}; | |||
if (so_path.length() >= MMPA_MAX_PATH) { | |||
GELOGW("File path is too long!"); | |||
return string(); | |||
} | |||
if (realpath(so_path.c_str(), path) == nullptr) { | |||
if (mmRealPath(so_path.c_str(), path, MMPA_MAX_PATH) != EN_OK) { | |||
GELOGW("Failed to get realpath of %s", so_path.c_str()); | |||
return string(); | |||
} | |||
@@ -108,35 +108,36 @@ void TBEPluginManager::FindParserSo(const string &path, vector<string> &file_lis | |||
GELOGW("RealPath is empty."); | |||
return; | |||
} | |||
struct stat stat_buf; | |||
if ((stat(real_path.c_str(), &stat_buf) != 0) || (!S_ISDIR(stat_buf.st_mode))) { | |||
GELOGW("%s is not a dir.", real_path.c_str()); | |||
return; | |||
} | |||
struct dirent *dent(0); | |||
DIR *dir = opendir(real_path.c_str()); | |||
// Plugin path does not exist | |||
if (dir == nullptr) { | |||
GELOGW("Open directory %s failed.", real_path.c_str()); | |||
return; | |||
INT32 is_dir = mmIsDir(real_path.c_str()); | |||
// Lib plugin path not exist | |||
if (is_dir != EN_OK) { | |||
GELOGW("%s is not a dir.", real_path.c_str()); | |||
return; | |||
} | |||
while ((dent = readdir(dir)) != nullptr) { | |||
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; | |||
string name = dent->d_name; | |||
string full_name = real_path + "/" + name; | |||
const string so_suff = ".so"; | |||
const string caffe_parser_so_suff = "lib_caffe_parser.so"; | |||
const string aicpu_so_suff = "_aicpu.so"; | |||
const string aicpu_host_so_suff = "_online.so"; | |||
if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { | |||
ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, | |||
aicpu_host_so_suff); | |||
} else { | |||
FindParserSo(full_name, file_list, caffe_parser_path); | |||
} | |||
mmDirent **entries = nullptr; | |||
auto ret = mmScandir(real_path.c_str(), &entries, nullptr, nullptr); | |||
if (ret < EN_OK) { | |||
GELOGW("scan dir failed. path = %s, ret = %d", real_path.c_str(), ret); | |||
return; | |||
} | |||
for (int i = 0; i < ret; ++i) { | |||
mmDirent *dent = entries[i]; | |||
if (strcmp(dent->d_name, ".") == 0 || strcmp(dent->d_name, "..") == 0) continue; | |||
string name = dent->d_name; | |||
string full_name = real_path + "/" + name; | |||
const string so_suff = ".so"; | |||
const string caffe_parser_so_suff = "lib_caffe_parser.so"; | |||
const string aicpu_so_suff = "_aicpu.so"; | |||
const string aicpu_host_so_suff = "_online.so"; | |||
if (name.size() >= so_suff.size() && name.compare(name.size() - so_suff.size(), so_suff.size(), so_suff) == 0) { | |||
ProcessSoFullName(file_list, caffe_parser_path, full_name, caffe_parser_so_suff, aicpu_so_suff, | |||
aicpu_host_so_suff); | |||
} else { | |||
FindParserSo(full_name, file_list, caffe_parser_path); | |||
} | |||
} | |||
closedir(dir); | |||
mmScandirFree(entries, ret); | |||
} | |||
void TBEPluginManager::GetPluginSoFileList(const string &path, vector<string> &file_list, string &caffe_parser_path) { | |||
@@ -159,8 +160,9 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||
fmk_type = ge::TypeUtils::FmkTypeToSerialString(type); | |||
GELOGI("Framework type is %s.", fmk_type.c_str()); | |||
const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
if (path_env != nullptr) { | |||
char path_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv("ASCEND_OPP_PATH", path_env, MMPA_MAX_PATH); | |||
if (res == EN_OK) { | |||
std::string path = path_env; | |||
customop_path = (path + "/framework/custom" + "/:") + (path + "/framework/built-in/" + fmk_type); | |||
GELOGI("Get custom so path from env : %s", path_env); | |||
@@ -210,9 +212,11 @@ void TBEPluginManager::LoadPluginSo(const std::map<string, string> &options) { | |||
for (auto elem : file_list) { | |||
StringUtils::Trim(elem); | |||
void *handle = dlopen(elem.c_str(), RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE); | |||
void *handle = mmDlopen(elem.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL | MMPA_RTLD_NODELETE); | |||
if (handle == nullptr) { | |||
GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), dlerror()); | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("dlopen failed, plugin name:%s. Message(%s).", elem.c_str(), error); | |||
} else if (find(handles_vec_.begin(), handles_vec_.end(), handle) == handles_vec_.end()) { | |||
// Close dl when the program exist, not close here | |||
GELOGI("Plugin load %s success.", elem.c_str()); | |||
@@ -17,7 +17,6 @@ | |||
#ifndef GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | |||
#define GE_COMMON_GE_TBE_PLUGIN_MANAGER_H_ | |||
#include <dlfcn.h> | |||
#include <functional> | |||
#include <iostream> | |||
#include <map> | |||
@@ -110,11 +110,12 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libascend_protobuf \ | |||
libc_sec \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
liberror_manager \ | |||
LOCAL_STATIC_LIBRARIES += libmmpa | |||
LOCAL_LDFLAGS := -lrt -ldl | |||
include $(BUILD_HOST_SHARED_LIBRARY) | |||
@@ -152,11 +153,12 @@ LOCAL_SHARED_LIBRARIES := \ | |||
libascend_protobuf \ | |||
libc_sec \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
liberror_manager \ | |||
LOCAL_STATIC_LIBRARIES += libmmpa | |||
ifeq ($(device_os),android) | |||
LOCAL_LDFLAGS += -ldl | |||
LOCAL_LDLIBS += -L$(PWD)/prebuilts/clang/linux-x86/aarch64/android-ndk-r21/sysroot/usr/lib/aarch64-linux-android/29 -llog | |||
@@ -14,8 +14,6 @@ | |||
* limitations under the License. | |||
*/ | |||
#include <fcntl.h> | |||
#include <unistd.h> | |||
#include <climits> | |||
#include <cstdio> | |||
#include <fstream> | |||
@@ -448,12 +446,12 @@ Status ModelCacheHelper::SaveJsonToFile(const string &file_name, const Json &jso | |||
} | |||
const string path = cache_path_ + file_name; | |||
const int FILE_AUTHORITY = 0600; | |||
int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, FILE_AUTHORITY); | |||
int fd = mmOpen2(path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, FILE_AUTHORITY); | |||
if (fd < 0) { | |||
GELOGW("Fail to open the file: %s.", path.c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (close(fd) != 0) { | |||
if (mmClose(fd) != 0) { | |||
GELOGW("Fail to close the file: %s.", path.c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
@@ -98,7 +98,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
ge::Buffer model_buffer; | |||
(void)model_tmp->Save(model_buffer); | |||
GELOGI("MODEL_DEF size is %zu", model_buffer.GetSize()); | |||
GELOGD("MODEL_DEF size is %zu", model_buffer.GetSize()); | |||
if (model_buffer.GetSize() > 0) { | |||
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::MODEL_DEF, model_buffer.GetData(), | |||
model_buffer.GetSize()) != SUCCESS) { | |||
@@ -107,7 +107,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
} | |||
} | |||
auto ge_model_weight = ge_model->GetWeight(); | |||
GELOGI("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | |||
GELOGD("WEIGHTS_DATA size is %zu, %p", ge_model_weight.GetSize(), ge_model_weight.GetData()); | |||
// weight is not necessary | |||
if (ge_model_weight.GetSize() > 0) { | |||
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | |||
@@ -117,7 +117,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
} | |||
TBEKernelStore tbe_kernel_store = ge_model->GetTBEKernelStore(); | |||
GELOGI("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); | |||
GELOGD("TBE_KERNELS size is %zu", tbe_kernel_store.DataSize()); | |||
if (tbe_kernel_store.DataSize() > 0) { | |||
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | |||
ModelPartitionType::TBE_KERNELS, | |||
@@ -129,7 +129,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
(void)tbe_kernel_store.Load(tbe_kernel_store.Data(), tbe_kernel_store.DataSize()); | |||
CustAICPUKernelStore cust_aicpu_kernel_store = ge_model->GetCustAICPUKernelStore(); | |||
GELOGI("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); | |||
GELOGD("cust aicpu kernels size is %zu", cust_aicpu_kernel_store.DataSize()); | |||
if (cust_aicpu_kernel_store.DataSize() > 0) { | |||
GE_CHK_STATUS_RET(SaveModelPartition(om_file_save_helper, | |||
ModelPartitionType::CUST_AICPU_KERNELS, | |||
@@ -155,8 +155,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
} | |||
(void)model_task_def->SerializePartialToArray(task_buffer.GetData(), static_cast<int>(partition_task_size)); | |||
GELOGI("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num()); | |||
GELOGI("TASK_INFO size is %zu", partition_task_size); | |||
GELOGD("TASK_INFO op_size:%d, stream_num:%u", model_task_def->op().size(), model_task_def->stream_num()); | |||
GELOGD("TASK_INFO size is %zu", partition_task_size); | |||
if (SaveModelPartition(om_file_save_helper, ModelPartitionType::TASK_INFO, task_buffer.GetData(), | |||
partition_task_size) != SUCCESS) { | |||
@@ -168,7 +168,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
model_header.platform_type = ge_model->GetPlatformType(); | |||
model_header.om_ir_version = ge_model->GetVersion(); | |||
std::string platform_version = ge_model->GetPlatformVersion(); | |||
GELOGI("Platform version save: %s", platform_version.c_str()); | |||
errno_t err; | |||
err = memcpy_s(model_header.platform_version, PLATFORM_VERSION_LEN, platform_version.c_str(), | |||
@@ -178,7 +177,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
return MEMALLOC_FAILED; | |||
} | |||
string version = reinterpret_cast<char *>(model_header.platform_version); | |||
GELOGI("Platform version save: %s", version.c_str()); | |||
GELOGD("Platform version save: %s", version.c_str()); | |||
size_t name_size = ge_model->GetName().size(); | |||
name_size = name_size > (MODEL_NAME_LENGTH - 1) ? (MODEL_NAME_LENGTH - 1) : name_size; | |||
@@ -188,7 +187,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::SaveToOmMod | |||
return MEMALLOC_FAILED; | |||
} | |||
string model_name = reinterpret_cast<char *>(model_header.name); | |||
GELOGI("Model name save:%s", model_name.c_str()); | |||
GELOGD("Model name save:%s", model_name.c_str()); | |||
Status ret = om_file_save_helper->SaveModel(save_param, output_file.c_str(), model, is_offline_); | |||
if (ret != SUCCESS) { | |||
@@ -346,7 +345,7 @@ Status ModelHelper::LoadModelData(OmFileLoadHelper &om_load_helper) { | |||
ModelPartition partition_model_def; | |||
// no need to check value, DATA->NetOutput | |||
om_load_helper.GetModelPartition(ModelPartitionType::MODEL_DEF, partition_model_def); | |||
GELOGI("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); | |||
GELOGD("Model_def partition addr:%p,size:%u", partition_model_def.data, partition_model_def.size); | |||
ge::Model model; | |||
if (ge::Model::Load(partition_model_def.data, partition_model_def.size, model) != SUCCESS) { | |||
@@ -376,7 +375,7 @@ Status ModelHelper::LoadWeights(OmFileLoadHelper &om_load_helper) { | |||
ge::Buffer weight = ge::Buffer::CopyFrom(partition.data, partition.size); | |||
model_->SetWeight(weight); | |||
GELOGI("GetWeight size:%u", partition.size); | |||
GELOGD("GetWeight size:%u", partition.size); | |||
return SUCCESS; | |||
} | |||
@@ -393,7 +392,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om | |||
GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGI("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); | |||
GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); | |||
} | |||
model_->SetModelTaskDef(task); | |||
return SUCCESS; | |||
@@ -404,9 +403,9 @@ Status ModelHelper::LoadTBEKernelStore(OmFileLoadHelper &om_load_helper) { | |||
ModelPartition partition_kernel_def; | |||
TBEKernelStore kernel_store; | |||
if (om_load_helper.GetModelPartition(ModelPartitionType::TBE_KERNELS, partition_kernel_def) == SUCCESS) { | |||
GELOGI("Kernels partition size:%u", partition_kernel_def.size); | |||
GELOGD("Kernels partition size:%u", partition_kernel_def.size); | |||
if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | |||
GELOGI("Load tbe kernels success"); | |||
GELOGD("Load tbe kernels success"); | |||
} else { | |||
GELOGW("Load tbe kernels failed"); | |||
} | |||
@@ -420,11 +419,9 @@ Status ModelHelper::LoadCustAICPUKernelStore(OmFileLoadHelper &om_load_helper) { | |||
ModelPartition partition_kernel_def; | |||
CustAICPUKernelStore kernel_store; | |||
if (om_load_helper.GetModelPartition(ModelPartitionType::CUST_AICPU_KERNELS, partition_kernel_def) == SUCCESS) { | |||
GELOGI("Kernels partition size:%u", partition_kernel_def.size); | |||
GELOGD("Kernels partition size:%u", partition_kernel_def.size); | |||
if (kernel_store.Load(partition_kernel_def.data, partition_kernel_def.size)) { | |||
GELOGI("Load cust aicpu kernels success"); | |||
} else { | |||
GELOGW("Load cust aicpu kernels failed"); | |||
} | |||
} | |||
model_->SetCustAICPUKernelStore(kernel_store); | |||
@@ -123,7 +123,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||
return ACL_ERROR_GE_EXEC_MODEL_PARTITION_NUM_INVALID; | |||
} | |||
size_t mem_offset = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | |||
GELOGI("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
GELOGD("ModelPartitionTable num :%u, ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | |||
partition_table->num, sizeof(ModelFileHeader), mem_offset); | |||
if (model_data_size <= mem_offset) { | |||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID, "invalid model data, partition_table->num:%u, model data size %u", | |||
@@ -143,7 +143,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, const uint | |||
return ACL_ERROR_GE_EXEC_MODEL_DATA_SIZE_INVALID; | |||
} | |||
mem_offset += partition.size; | |||
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||
GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -167,7 +167,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelPartitionTable *OmFileSave | |||
ModelPartition partition = context_.partition_datas_[i]; | |||
partition_table->partition[i] = {partition.type, mem_offset, partition.size}; | |||
mem_offset += partition.size; | |||
GELOGI("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||
GELOGD("Partition, type:%d, size:%u", static_cast<int>(partition.type), partition.size); | |||
} | |||
return partition_table; | |||
} | |||
@@ -191,7 +191,7 @@ Status OmFileSaveHelper::SaveModel(const SaveParam &save_param, const char *outp | |||
(void)save_param.pri_key_file; | |||
Status ret = SaveModelToFile(output_file, model, is_offline); | |||
if (ret == SUCCESS) { | |||
GELOGI("Generate model with encrypt."); | |||
GELOGD("Generate model with encrypt."); | |||
} | |||
return ret; | |||
} | |||
@@ -213,7 +213,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat | |||
FMK_UINT32_ADDCHECK(size_of_table, model_data_len) | |||
model_header_.length = size_of_table + model_data_len; | |||
GELOGI("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu", | |||
GELOGD("Sizeof(ModelFileHeader):%zu,sizeof(ModelPartitionTable):%u, model_data_len:%u, model_total_len:%zu", | |||
sizeof(ModelFileHeader), size_of_table, model_data_len, model_header_.length + sizeof(ModelFileHeader)); | |||
std::vector<ModelPartition> partition_datas = context_.partition_datas_; | |||
@@ -224,7 +224,7 @@ Status OmFileSaveHelper::SaveModelToFile(const char *output_file, ModelBufferDat | |||
ret = FileSaver::SaveToBuffWithFileHeader(model_header_, *partition_table, partition_datas, model); | |||
} | |||
if (ret == SUCCESS) { | |||
GELOGI("Save model success without encrypt."); | |||
GELOGD("Save model success without encrypt."); | |||
} | |||
return ret; | |||
#else | |||
@@ -51,7 +51,7 @@ bool KernelStore::Build() { | |||
kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | |||
kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | |||
GELOGI("get kernel bin name %s, addr %p, size %u", | |||
GELOGD("get kernel bin name %s, addr %p, size %u", | |||
kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | |||
mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | |||
GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | |||
@@ -95,7 +95,7 @@ bool KernelStore::Load(const uint8_t *data, const size_t &len) { | |||
std::string name(next_buffer, kernel_head->name_len); | |||
next_buffer += kernel_head->name_len; | |||
GELOGI("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len); | |||
GELOGD("Load kernel from om:%s,%u,%u", name.c_str(), kernel_head->name_len, kernel_head->bin_len); | |||
std::vector<char> kernel_bin(next_buffer, next_buffer + kernel_head->bin_len); | |||
KernelBinPtr teb_kernel_ptr = ge::MakeShared<KernelBin>(name, std::move(kernel_bin)); | |||
if (teb_kernel_ptr != nullptr) { | |||
@@ -17,7 +17,6 @@ | |||
#include "common/model_parser/base.h" | |||
#include "common/helper/model_helper.h" | |||
#include <securec.h> | |||
#include <sys/sysinfo.h> | |||
#include <fstream> | |||
#include <memory> | |||
#include <string> | |||
@@ -107,7 +106,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::ParseMo | |||
model_data = data; | |||
model_len = file_header->length; | |||
GELOGI("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | |||
GELOGD("Model_len is %u, model_file_head_len is %zu.", model_len, sizeof(ModelFileHeader)); | |||
} else { | |||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION, "Invalid model. ModelEncryptType not supported."); | |||
res = ACL_ERROR_GE_EXEC_MODEL_NOT_SUPPORT_ENCRYPTION; | |||
@@ -16,9 +16,7 @@ | |||
#include "common/model_saver.h" | |||
#include <fcntl.h> | |||
#include <securec.h> | |||
#include <unistd.h> | |||
#include <cstdlib> | |||
#include <fstream> | |||
#include <string> | |||
@@ -51,14 +49,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||
return FAILED; | |||
} | |||
char real_path[PATH_MAX] = {0}; | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= PATH_MAX, return FAILED, "file path is too long!"); | |||
GE_IF_BOOL_EXEC(realpath(file_path, real_path) == nullptr, | |||
char real_path[MMPA_MAX_PATH] = {0}; | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path) >= MMPA_MAX_PATH, return FAILED, "file path is too long!"); | |||
GE_IF_BOOL_EXEC(mmRealPath(file_path, real_path, MMPA_MAX_PATH) != EN_OK, | |||
GELOGI("File %s does not exit, it will be created.", file_path)); | |||
// Open file | |||
mode_t mode = S_IRUSR | S_IWUSR; | |||
int32_t fd = mmOpen2(real_path, O_RDWR | O_CREAT | O_TRUNC, mode); | |||
mmMode_t mode = M_IRUSR | M_IWUSR; | |||
int32_t fd = mmOpen2(real_path, M_RDWR | M_CREAT | O_TRUNC, mode); | |||
if (fd == EN_ERROR || fd == EN_INVALID_PARAM) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19001", {"file", "errmsg"}, {file_path, strerror(errno)}); | |||
GELOGE(FAILED, "Open file[%s] failed. %s", file_path, strerror(errno)); | |||
@@ -72,7 +70,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelSaver::SaveJsonToFi | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19004", {"file", "errmsg"}, {file_path, strerror(errno)}); | |||
// Need to both print the error info of mmWrite and mmClose, so return ret after mmClose | |||
GELOGE(FAILED, "Write to file failed. errno = %d, %s", mmpa_ret, strerror(errno)); | |||
GELOGE(FAILED, "Write to file failed. errno = %ld, %s", mmpa_ret, strerror(errno)); | |||
ret = FAILED; | |||
} | |||
// Close file | |||
@@ -214,8 +214,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::Pa | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::InitFromOptions(const Options &options) { | |||
#ifdef DAVINCI_SUPPORT_PROFILING | |||
// enable profiling support two ways: env and front end | |||
const char *profiling_mode = std::getenv("PROFILING_MODE"); | |||
const char *prof_options = std::getenv("PROFILING_OPTIONS"); | |||
char profiling_mode_temp[MMPA_MAX_PATH] = { 0x00 }; | |||
char prof_options_temp[MMPA_MAX_PATH] = { 0x00 }; | |||
(void)mmGetEnv("PROFILING_MODE", profiling_mode_temp, MMPA_MAX_PATH); | |||
(void)mmGetEnv("PROFILING_OPTIONS", prof_options_temp, MMPA_MAX_PATH ); | |||
const char *profiling_mode = profiling_mode_temp; | |||
const char *prof_options = prof_options_temp; | |||
if ((profiling_mode == nullptr) || (strcmp("true", profiling_mode) != 0) || (prof_options == nullptr)) { | |||
is_load_profiling_ = false; | |||
is_execute_profiling_ = false; | |||
@@ -554,7 +558,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
GELOGE(rt_ret, "runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | |||
return; | |||
} | |||
GELOGI("current logic_device_id:%d", logic_device_id); | |||
GELOGD("current logic_device_id:%d", logic_device_id); | |||
if (check_device) { | |||
auto ret = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||
if (ret == device_id_.end()) { | |||
@@ -562,11 +566,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::ReportPr | |||
return; | |||
} | |||
} | |||
GELOGI("start ProfilingTaskDescInfo."); | |||
GELOGD("start ProfilingTaskDescInfo."); | |||
ProfilingTaskDescInfo(model_id, task_desc_info, logic_device_id); | |||
GELOGI("start ProfilingGraphDescInfo."); | |||
GELOGD("start ProfilingGraphDescInfo."); | |||
ProfilingGraphDescInfo(model_id, compute_graph_desc_info, logic_device_id); | |||
GELOGI("Report profiling data for GE end."); | |||
GELOGD("Report profiling data for GE end."); | |||
#endif | |||
} | |||
@@ -855,7 +859,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | |||
} | |||
GELOGI("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||
GELOGD("Runtime config param: 0x%llx, device num: %d.", module, device_num); | |||
rtError_t rt_ret = rtProfilerStart(module, device_num, device_id_ptr.get()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -874,7 +878,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
GELOGW("Prof start: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStart, module, device_list); | |||
GELOGI("Prof start profiling success."); | |||
GELOGD("Prof start profiling success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
@@ -897,7 +901,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
for (int32_t i = 0; i < device_num; i++) { | |||
device_id_ptr[i] = static_cast<uint32_t>(device_list[i]); | |||
} | |||
GELOGI("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||
GELOGD("Prof stop: runtime config param: 0x%llx, device num: %d", module, device_num); | |||
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get()); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); | |||
@@ -917,7 +921,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt | |||
GELOGW("Prof stop: load model module is invalid."); | |||
} | |||
UpdateDeviceIdModuleMap(kProfStop, module, device_list); | |||
GELOGI("Prof stop profiling success."); | |||
GELOGD("Prof stop profiling success."); | |||
#endif | |||
return SUCCESS; | |||
} | |||
@@ -959,14 +963,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(rt_ret, "Runtime get logic_device_id failed, current logic_device_id:%d", logic_device_id); | |||
} | |||
GELOGI("Current logic_device_id:%d", logic_device_id); | |||
GELOGD("Current logic_device_id:%d", logic_device_id); | |||
bool execute_model_prof_on = false; | |||
auto iter = std::find(device_id_.begin(), device_id_.end(), logic_device_id); | |||
if (iter != device_id_.end()) { | |||
execute_model_prof_on = true; | |||
} | |||
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||
GELOGD("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on); | |||
return is_execute_profiling_ || execute_model_prof_on; | |||
} | |||
@@ -25,13 +25,14 @@ | |||
#include "common/dump/dump_properties.h" | |||
#include "graph/op_desc.h" | |||
#include "common/ge_compiler_options.h" | |||
namespace ge { | |||
// Configuration property management | |||
static const char *SYSMODE __attribute__((unused)) = "FMK_SYSMODE"; | |||
static const char *USE_FUSION __attribute__((unused)) = "FMK_USE_FUSION"; | |||
static const char *TIMESTAT_ENABLE __attribute__((unused)) = "DAVINCI_TIMESTAT_ENABLE"; | |||
static const char *ANNDROID_DEBUG __attribute__((unused)) = "ANNDROID_DEBUG"; | |||
static const char *SYSMODE GE_ATTRIBUTE_UNUSED = "FMK_SYSMODE"; | |||
static const char *USE_FUSION GE_ATTRIBUTE_UNUSED = "FMK_USE_FUSION"; | |||
static const char *TIMESTAT_ENABLE GE_ATTRIBUTE_UNUSED = "DAVINCI_TIMESTAT_ENABLE"; | |||
static const char *ANNDROID_DEBUG GE_ATTRIBUTE_UNUSED = "ANNDROID_DEBUG"; | |||
class PropertiesManager { | |||
public: | |||
@@ -16,11 +16,12 @@ | |||
#include "framework/common/util.h" | |||
#include <fcntl.h> | |||
#include <sys/stat.h> | |||
#ifdef __GNUC__ | |||
#include <regex.h> | |||
#include <unistd.h> | |||
#else | |||
#include <regex> | |||
#endif | |||
#include <algorithm> | |||
#include <climits> | |||
#include <cstdlib> | |||
@@ -208,29 +209,30 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadBytesFromBinaryFile(co | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY int CreateDirectory(const std::string &directory_path) { | |||
GE_CHK_BOOL_EXEC(!directory_path.empty(), return -1, "directory path is empty."); | |||
auto dir_path_len = directory_path.length(); | |||
if (dir_path_len >= PATH_MAX) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, | |||
{directory_path, std::to_string(PATH_MAX)}); | |||
GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), PATH_MAX); | |||
if (dir_path_len >= MMPA_MAX_PATH) { | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19002", {"filepath", "size"}, {directory_path, std::to_string(MMPA_MAX_PATH)}); | |||
GELOGW("Path[%s] len is too long, it must be less than %d", directory_path.c_str(), MMPA_MAX_PATH); | |||
return -1; | |||
} | |||
char tmp_dir_path[PATH_MAX] = {0}; | |||
char tmp_dir_path[MMPA_MAX_PATH] = {0}; | |||
for (size_t i = 0; i < dir_path_len; i++) { | |||
tmp_dir_path[i] = directory_path[i]; | |||
if ((tmp_dir_path[i] == '\\') || (tmp_dir_path[i] == '/')) { | |||
if (access(tmp_dir_path, F_OK) != 0) { | |||
int32_t ret = mmMkdir(tmp_dir_path, S_IRUSR | S_IWUSR | S_IXUSR); // 700 | |||
if (mmAccess2(tmp_dir_path, M_F_OK) != EN_OK) { | |||
int32_t ret = mmMkdir(tmp_dir_path, M_IRUSR | M_IWUSR | M_IXUSR); // 700 | |||
if (ret != 0) { | |||
if (errno != EEXIST) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | |||
GELOGW("Can not create directory %s. Make sure the directory exists and writable.", directory_path.c_str()); | |||
GELOGW("Can not create directory %s. Make sure the directory exists and writable.", | |||
directory_path.c_str()); | |||
return ret; | |||
} | |||
} | |||
} | |||
} | |||
} | |||
int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR); // 700 | |||
int32_t ret = mmMkdir(const_cast<char *>(directory_path.c_str()), M_IRUSR | M_IWUSR | M_IXUSR); // 700 | |||
if (ret != 0) { | |||
if (errno != EEXIST) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19006", {"path"}, {directory_path}); | |||
@@ -305,9 +307,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ReadProtoFromMem(const cha | |||
} | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY uint64_t GetCurrentTimestamp() { | |||
struct timeval tv {}; | |||
int ret = gettimeofday(&tv, nullptr); | |||
GE_LOGE_IF(ret != 0, "Func gettimeofday may failed: ret=%d", ret); | |||
mmTimeval tv {}; | |||
int ret = mmGetTimeOfDay(&tv, nullptr); | |||
GE_LOGE_IF(ret != EN_OK, "Func gettimeofday may failed: ret=%d", ret); | |||
auto total_use_time = tv.tv_usec + tv.tv_sec * 1000000; // 1000000: seconds to microseconds | |||
return static_cast<uint64_t>(total_use_time); | |||
} | |||
@@ -347,16 +349,15 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInt64MulOverflow(int6 | |||
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::string RealPath(const char *path) { | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(path == nullptr, return "", "path pointer is NULL."); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
strlen(path) >= PATH_MAX, | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(PATH_MAX)}); | |||
return "", "Path[%s] len is too long, it must be less than %d", path, PATH_MAX); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(path) >= MMPA_MAX_PATH, | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19002", {"filepath", "size"}, {path, std::to_string(MMPA_MAX_PATH)}); | |||
return "", "Path[%s] len is too long, it must be less than %d", path, MMPA_MAX_PATH); | |||
// Nullptr is returned when the path does not exist or there is no permission | |||
// Return absolute path when path is accessible | |||
std::string res; | |||
char resolved_path[PATH_MAX] = {0}; | |||
if (realpath(path, resolved_path) != nullptr) { | |||
char resolved_path[MMPA_MAX_PATH] = {0}; | |||
if (mmRealPath(path, resolved_path, MMPA_MAX_PATH) == EN_OK) { | |||
res = resolved_path; | |||
} | |||
@@ -383,7 +384,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||
// A regular matching expression to verify the validity of the input file path | |||
// Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | |||
// File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | |||
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||
#ifdef __GNUC__ | |||
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||
#else | |||
std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; | |||
#endif | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
!ValidateStr(real_path, mode), | |||
@@ -392,7 +397,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckInputPathValid(const | |||
return false, "Invalid value for %s[%s], %s.", atc_param.c_str(), real_path.c_str(), kPathValidReason); | |||
// The absolute path points to a file that is not readable | |||
if (access(real_path.c_str(), R_OK) != 0) { | |||
if (mmAccess2(real_path.c_str(), M_R_OK) != EN_OK) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19003", {"file", "errmsg"}, {file_path.c_str(), strerror(errno)}); | |||
GELOGW("Read file[%s] failed, errmsg[%s]", file_path.c_str(), strerror(errno)); | |||
return false; | |||
@@ -410,15 +415,19 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||
return false; | |||
} | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
strlen(file_path.c_str()) >= PATH_MAX, ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19002", {"filepath", "size"}, {file_path, std::to_string(PATH_MAX)}); | |||
return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), PATH_MAX); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(strlen(file_path.c_str()) >= MMPA_MAX_PATH, | |||
ErrorManager::GetInstance().ATCReportErrMessage( | |||
"E19002", {"filepath", "size"}, {file_path, std::to_string(MMPA_MAX_PATH)}); | |||
return "", "Path[%s] len is too long, it must be less than %d", file_path.c_str(), MMPA_MAX_PATH); | |||
// A regular matching expression to verify the validity of the input file path | |||
// Path section: Support upper and lower case letters, numbers dots(.) chinese and underscores | |||
// File name section: Support upper and lower case letters, numbers, underscores chinese and dots(.) | |||
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||
#ifdef __GNUC__ | |||
std::string mode = "^[\u4e00-\u9fa5A-Za-z0-9./_-]+$"; | |||
#else | |||
std::string mode = "^[a-zA-Z]:([\\\\/][^\\s\\\\/:*?<>\"|][^\\\\/:*?<>\"|]*)*([/\\\\][^\\s\\\\/:*?<>\"|])?$"; | |||
#endif | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
!ValidateStr(file_path, mode), | |||
@@ -430,7 +439,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||
// Can get absolute path (file exists) | |||
if (!real_path.empty()) { | |||
// File is not readable or writable | |||
if (access(real_path.c_str(), W_OK | F_OK) != 0) { | |||
if (mmAccess2(real_path.c_str(), M_W_OK | M_F_OK) != EN_OK) { | |||
ErrorManager::GetInstance().ATCReportErrMessage("E19004", {"file", "errmsg"}, {real_path, strerror(errno)}); | |||
GELOGW("Write file[%s] failed, errmsg[%s]", real_path.c_str(), strerror(errno)); | |||
return false; | |||
@@ -461,6 +470,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool CheckOutputPathValid(const | |||
} | |||
FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::string &mode) { | |||
#ifdef __GNUC__ | |||
char ebuff[kMaxBuffSize]; | |||
regex_t reg; | |||
int cflags = REG_EXTENDED | REG_NOSUB; | |||
@@ -482,6 +492,23 @@ FMK_FUNC_HOST_VISIBILITY bool ValidateStr(const std::string &str, const std::str | |||
regfree(®); | |||
return true; | |||
#else | |||
std::wstring wstr(str.begin(), str.end()); | |||
std::wstring wmode(mode.begin(), mode.end()); | |||
std::wsmatch match; | |||
bool res = false; | |||
try { | |||
std::wregex reg(wmode, std::regex::icase); | |||
// Matching string part | |||
res = regex_match(wstr, match, reg); | |||
res = regex_search(str, std::regex("[`!@#$%^&*()|{}';',<>?]")); | |||
} catch (std::exception &ex) { | |||
GELOGW("The directory %s is invalid, error: %s.", str.c_str(), ex.what()); | |||
return false; | |||
} | |||
return !(res) && (str.size() == match.str().size()); | |||
#endif | |||
} | |||
FMK_FUNC_HOST_VISIBILITY bool IsValidFile(const char *file_path) { | |||
@@ -433,7 +433,7 @@ Status DNNEngineManager::ReadJsonFile(const std::string &file_path, JsonHandle h | |||
return FAILED; | |||
} | |||
const char *file = file_path.data(); | |||
if ((access(file, F_OK)) == -1) { | |||
if ((mmAccess2(file, M_F_OK)) != EN_OK) { | |||
if (engines_map_.size() != 0) { | |||
GELOGE(FAILED, "The json file %s is not exist, %s", file_path.c_str(), strerror(errno)); | |||
return FAILED; | |||
@@ -588,7 +588,7 @@ Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||
} | |||
Status GeExecutor::UnloadModel(uint32_t model_id) { | |||
GELOGI("unload model %u begin.", model_id); | |||
GELOGD("unload model %u begin.", model_id); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -630,7 +630,6 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData | |||
// Get input and output descriptor | |||
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | |||
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | |||
GELOGI("get model desc info begin."); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -665,7 +664,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
GetGeTensorDescFromDomiInfo(input_desc, input_desc_infos, input_formats); | |||
GetGeTensorDescFromDomiInfo(output_desc, output_desc_infos, output_formats); | |||
GELOGI("get model desc info end."); | |||
return ge::SUCCESS; | |||
} | |||
@@ -679,7 +677,6 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes | |||
/// | |||
Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info, | |||
int32_t &dynamic_type) { | |||
GELOGI("Begin to get dynamic batch info."); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -690,8 +687,6 @@ Status GeExecutor::GetDynamicBatchInfo(uint32_t model_id, std::vector<std::vecto | |||
GELOGE(ret, "GetDynamicBatchInfo failed."); | |||
return ret; | |||
} | |||
GELOGI("Get dynamic batch info succ."); | |||
return SUCCESS; | |||
} | |||
@@ -727,7 +722,6 @@ Status GeExecutor::GetCombinedDynamicDims(uint32_t model_id, vector<vector<int64 | |||
/// @return execute result | |||
/// | |||
Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> &user_designate_shape_order) { | |||
GELOGI("Begin to get user designate shape info."); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -739,7 +733,6 @@ Status GeExecutor::GetUserDesignateShapeOrder(uint32_t model_id, vector<string> | |||
return ret; | |||
} | |||
GELOGI("Get user designate shape order succ."); | |||
return SUCCESS; | |||
} | |||
@@ -782,7 +775,6 @@ Status GeExecutor::GetAippType(uint32_t model_id, uint32_t index, InputAippType | |||
} | |||
Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dynamic_output_shape_info) { | |||
GELOGI("Begin to get dynamic batch output shape info"); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -792,8 +784,6 @@ Status GeExecutor::GetModelAttr(uint32_t model_id, std::vector<std::string> &dyn | |||
GELOGE(ret, "Get dynamic batch output shape info failed."); | |||
return ret; | |||
} | |||
GELOGI("Get dynamic batch output shape info succ."); | |||
return SUCCESS; | |||
} | |||
@@ -835,8 +825,6 @@ Status GeExecutor::GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge | |||
} | |||
Status GeExecutor::CommandHandle(const Command &command) { | |||
GELOGI("command handle begin."); | |||
Status ret = GraphLoader::CommandHandle(command); | |||
if (ret != SUCCESS) { | |||
GELOGE(ACL_ERROR_GE_COMMAND_HANDLE, "CommandHandle: Command Handle failed."); | |||
@@ -904,7 +892,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da | |||
*/ | |||
Status GeExecutor::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, size_t mem_size, | |||
void *weight_ptr, size_t weight_size) { | |||
GELOGI("Load model from data begin."); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "not inited yet!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -945,7 +932,6 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat | |||
*/ | |||
Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data, | |||
ge::RunModelData &run_output_data, bool async_mode) { | |||
GELOGI("Execute model begin."); | |||
if (!isInit_) { | |||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||
@@ -387,12 +387,12 @@ LOCAL_SRC_FILES += $(BUILER_SRC_FILES) | |||
LOCAL_SRC_FILES += $(ANALYZER_SRC_FILES) | |||
LOCAL_STATIC_LIBRARIES := libge_memory \ | |||
libmmpa \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
libc_sec \ | |||
libascend_protobuf \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libge_common \ | |||
@@ -451,12 +451,12 @@ LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) | |||
LOCAL_C_INCLUDES += $(ANALYZER_LOCAL_INCLUDES) | |||
LOCAL_STATIC_LIBRARIES := libge_memory \ | |||
libmmpa \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
libc_sec \ | |||
libascend_protobuf \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libresource \ | |||
@@ -221,7 +221,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, | |||
Operator op = ge::OpDescUtils::CreateOperatorFromOpDesc(op_desc); | |||
auto ret = op_kernel.Compute(op, named_inputs, named_outputs); | |||
if (ret != GRAPH_SUCCESS) { | |||
GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret); | |||
GELOGW("Failed to compute host cpu op. node = %s", op_desc->GetName().c_str()); | |||
return FAILED; | |||
} | |||
op.BreakConnect(); | |||
@@ -88,6 +88,25 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||
include ${BUILD_HOST_SHARED_LIBRARY} | |||
#compiler for device libge_local_opskernel_builder.so | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := libge_local_opskernel_builder | |||
LOCAL_CFLAGS += -Werror | |||
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||
LOCAL_LDFLAGS := | |||
LOCAL_STATIC_LIBRARIES := | |||
LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||
libc_sec \ | |||
libslog \ | |||
libregister \ | |||
libgraph | |||
LOCAL_SRC_FILES := $(ops_kernel_builder_src_files) | |||
LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||
include ${BUILD_SHARED_LIBRARY} | |||
#compiler for libge_local_opskernel_builder.so in atc | |||
include $(CLEAR_VARS) | |||
@@ -111,7 +111,7 @@ Status GeLocalOpsKernelBuilder::CalcOpRunningParam(Node &ge_node) { | |||
TypeUtils::DataTypeToSerialString(data_type).c_str(), output_mem_size); | |||
return FAILED; | |||
} | |||
GELOGI( | |||
GELOGD( | |||
"Calc op[%s:%s] out[%zu] mem size is %ld," | |||
" format=%s, data_type=%s.", | |||
node_name.c_str(), node_type.c_str(), i, output_mem_size, TypeUtils::FormatToSerialString(format).c_str(), | |||
@@ -174,7 +174,7 @@ Status GeLocalOpsKernelBuilder::GenerateTask(const Node &node, RunContext &conte | |||
GELOGE(ret, "Node:%s(%s) op run failed.", name.c_str(), type.c_str()); | |||
return ret; | |||
} | |||
GELOGI("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); | |||
GELOGD("Ge local generate task for node:%s(%s) end, tasks.size()=%zu.", name.c_str(), type.c_str(), tasks.size()); | |||
return ret; | |||
} | |||
} // namespace ge_local | |||
@@ -24,7 +24,7 @@ namespace ge_local { | |||
NoOp::NoOp(const Node &node, RunContext &run_context) : Op(node, run_context) {} | |||
Status NoOp::Run() { | |||
GELOGI("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); | |||
GELOGD("Node:%s type is %s, no need generate task.", name_.c_str(), type_.c_str()); | |||
// Do nothing | |||
return SUCCESS; | |||
} | |||
@@ -372,12 +372,12 @@ LOCAL_SRC_FILES += $(LIBCLIENT_LOCAL_SRC_FILES) | |||
LOCAL_STATIC_LIBRARIES := libge_memory \ | |||
libadump_server \ | |||
libmsprofiler \ | |||
libmmpa \ | |||
LOCAL_SHARED_LIBRARIES := \ | |||
libc_sec \ | |||
libascend_protobuf \ | |||
libslog \ | |||
libmmpa \ | |||
libgraph \ | |||
libregister \ | |||
libge_common \ | |||
@@ -15,83 +15,56 @@ | |||
*/ | |||
#include "ge_runtime/task/hccl_task.h" | |||
#include <algorithm> | |||
#include "ge_runtime/task/task_factory.h" | |||
#include "common/opskernel/ops_kernel_info_store.h" | |||
#include "common/opskernel/ge_task_info.h" | |||
namespace ge { | |||
namespace model_runner { | |||
std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<HcclTask::StreamGuard>>>> | |||
HcclTask::model_stream_mapping_; | |||
std::mutex HcclTask::model_stream_mapping_mutex_; | |||
HcclTask::HcclTask(const ModelContext &model_context, const std::shared_ptr<HcclTaskInfo> &task_info) | |||
: TaskRepeater<HcclTaskInfo>(model_context, task_info), | |||
task_info_(task_info), | |||
stream_(nullptr), | |||
workspace_mem_(nullptr), | |||
rt_model_handle_(nullptr), | |||
priority_(0), | |||
slave_stream_list_(), | |||
hcom_bind_model_(nullptr), | |||
hcom_unbind_model_(nullptr), | |||
hcom_distribute_task_(nullptr) { | |||
secondary_stream_list_() { | |||
if (task_info_ == nullptr) { | |||
GELOGW("task_info_ is null!"); | |||
} | |||
hcom_bind_model_ = task_info->hcom_bind_model(); | |||
hcom_unbind_model_ = task_info->hcom_unbind_model(); | |||
priority_ = model_context.priority(); | |||
rt_model_handle_ = model_context.rt_model_handle(); | |||
auto stream_list = model_context.stream_list(); | |||
if (hcom_bind_model_ != nullptr) { | |||
if (rt_model_handle_list_.insert(rt_model_handle_).second) { | |||
for (auto stream : stream_list) { | |||
(void)hcom_bind_model_(rt_model_handle_, stream); | |||
} | |||
} | |||
} | |||
if (stream_list.size() == 1) { | |||
stream_ = stream_list[0]; | |||
} else if (stream_list.size() > task_info->stream_id()) { | |||
stream_ = stream_list[task_info->stream_id()]; | |||
} else { | |||
GELOGW("index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size()); | |||
GELOGW("Index: %u >= stream_list.size(): %zu.", task_info->stream_id(), stream_list.size()); | |||
} | |||
} | |||
HcclTask::~HcclTask() { | |||
for (size_t i = 0; i < slave_stream_list_.size(); ++i) { | |||
rtError_t rt_ret = rtModelUnbindStream(rt_model_handle_, slave_stream_list_[i]); | |||
if (workspace_mem_ != nullptr) { | |||
rtError_t rt_ret = rtFree(workspace_mem_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Unbind stream from model failed! Index: %zu", i); | |||
} | |||
} | |||
for (size_t i = 0; i < slave_stream_list_.size(); ++i) { | |||
rtError_t rt_ret = rtStreamDestroy(slave_stream_list_[i]); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Destroy stream failed! Index: %zu", i); | |||
} | |||
} | |||
if (hcom_unbind_model_ != nullptr) { | |||
if (rt_model_handle_list_.find(rt_model_handle_) != rt_model_handle_list_.end()) { | |||
(void)hcom_unbind_model_(rt_model_handle_); | |||
(void)rt_model_handle_list_.erase(rt_model_handle_); | |||
GELOGE(RT_FAILED, "rtFree workspace_mem_ failed! ret: 0x%X.", rt_ret); | |||
} | |||
workspace_mem_ = nullptr; | |||
} | |||
} | |||
bool HcclTask::Distribute() { | |||
// No ops kernel info store | |||
hcom_distribute_task_ = task_info_->hcom_distribute_task(); | |||
if (hcom_distribute_task_ != nullptr) { | |||
return hcom_distribute_task_(task_info_, stream_); | |||
} | |||
// Ops kernel info store | |||
// Get privateDef and opsKernelStorePtr | |||
GELOGI("get custom info in modelTaskDef"); | |||
GELOGI("Get custom info in modelTaskDef"); | |||
void *ops_kernel_store = task_info_->ops_kernel_store(); | |||
OpsKernelInfoStore *ops_kernel_info_store = reinterpret_cast<OpsKernelInfoStore *>(ops_kernel_store); | |||
if (ops_kernel_store == nullptr) { | |||
@@ -101,25 +74,15 @@ bool HcclTask::Distribute() { | |||
char *private_def = reinterpret_cast<char *>(const_cast<char unsigned *>(task_info_->private_def().data())); | |||
auto private_def_len = static_cast<uint32_t>(task_info_->private_def().size()); | |||
GELOGI("the first address of the custom info, privateDef=%p", private_def); | |||
GELOGI("hcclStreamNum =%ld", task_info_->hccl_stream_num()); | |||
for (int64_t i = 0; i < task_info_->hccl_stream_num(); ++i) { | |||
rtStream_t stream = nullptr; | |||
rtError_t rt_ret = rtStreamCreateWithFlags(&stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return false; | |||
} | |||
GELOGI("The first address of the custom info, privateDef=%p", private_def); | |||
SetSecondaryStream(); | |||
rt_ret = rtModelBindStream(rt_model_handle_, stream, RT_HEAD_STREAM); | |||
if (task_info_->workspace_size() > 0) { | |||
rtError_t rt_ret = rtMalloc(&workspace_mem_, task_info_->workspace_size(), RT_MEMORYINFO_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return false; | |||
} | |||
GELOGI("hccl_stream addr is=%p", stream); | |||
slave_stream_list_.push_back(stream); | |||
} | |||
GELOGI("HcclTaskInfo Distribute Start. begin to call function LoadTask in hccl."); | |||
@@ -128,17 +91,22 @@ bool HcclTask::Distribute() { | |||
ge_task.type = static_cast<uint16_t>(RT_MODEL_TASK_HCCL); | |||
ge_task.stream = stream_; | |||
ge_task.kernelHcclInfo = std::vector<GETaskKernelHcclInfo>(1); | |||
ge_task.kernelHcclInfo[0].hccl_type = task_info_->hccl_type(); | |||
ge_task.kernelHcclInfo[0].inputDataAddr = task_info_->input_data_addr(); | |||
ge_task.kernelHcclInfo[0].outputDataAddr = task_info_->output_data_addr(); | |||
ge_task.kernelHcclInfo[0].workSpaceAddr = task_info_->workspace_addr(); | |||
ge_task.kernelHcclInfo[0].workSpaceAddr = workspace_mem_; | |||
ge_task.kernelHcclInfo[0].workSpaceMemSize = task_info_->workspace_size(); | |||
ge_task.kernelHcclInfo[0].count = task_info_->count(); | |||
ge_task.kernelHcclInfo[0].dataType = static_cast<int32_t>(task_info_->data_type()); | |||
ge_task.kernelHcclInfo[0].opType = static_cast<int32_t>(task_info_->op_type()); | |||
ge_task.kernelHcclInfo[0].rootId = task_info_->root_id(); | |||
ge_task.kernelHcclInfo[0].hcclStreamList = slave_stream_list_; | |||
std::vector<rtStream_t> secondary_stream_list; | |||
std::transform(secondary_stream_list_.begin(), secondary_stream_list_.end(), | |||
std::back_inserter(secondary_stream_list), | |||
[](const std::shared_ptr<StreamGuard> &stream) -> rtStream_t { return stream->GetStream(); }); | |||
ge_task.kernelHcclInfo[0].hcclStreamList = secondary_stream_list; | |||
ge_task.privateDef = private_def; | |||
ge_task.privateDefLen = private_def_len; | |||
@@ -151,10 +119,152 @@ bool HcclTask::Distribute() { | |||
return false; | |||
} | |||
GELOGI("call function LoadTask end."); | |||
GELOGI("Call function LoadTask end."); | |||
return true; | |||
} | |||
bool HcclTask::SetSecondaryStream() { | |||
const uint32_t master_stream_id = task_info_->stream_id(); | |||
const int64_t hccl_secondary_stream_num = task_info_->hccl_stream_num(); | |||
Status ret; | |||
std::lock_guard<std::mutex> lock(model_stream_mapping_mutex_); | |||
if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) { | |||
GELOGI("Need to create map for rt_model_handle_:%p with new mainstream %ld.", rt_model_handle_, master_stream_id); | |||
ret = CreateStream(hccl_secondary_stream_num, master_stream_id); | |||
if (!ret) { | |||
GELOGE(RT_FAILED, "Create hccl stream failed."); | |||
return false; | |||
} | |||
return true; | |||
} | |||
std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map = | |||
model_stream_mapping_.at(rt_model_handle_); | |||
auto iter = master_secondary_stream_map.find(master_stream_id); | |||
if (iter != master_secondary_stream_map.end()) { | |||
std::vector<std::weak_ptr<StreamGuard>> &secondary_stream_vec = iter->second; | |||
auto lock_weak_ptr = [&secondary_stream_vec, this](int64_t index) -> bool { | |||
auto stream = secondary_stream_vec[index].lock(); | |||
if (stream == nullptr) { | |||
rtStream_t new_stream = nullptr; | |||
bool ret = CreateStream(rt_model_handle_, &new_stream); | |||
if (!ret) { | |||
GELOGE(FAILED, "CreateStream failed."); | |||
return false; | |||
} | |||
stream = std::make_shared<HcclTask::StreamGuard>(rt_model_handle_, new_stream); | |||
if (stream == nullptr) { | |||
GELOGE(FAILED, "MakeShared failed."); | |||
return false; | |||
} | |||
secondary_stream_vec[index] = stream; | |||
} | |||
secondary_stream_list_.push_back(stream); | |||
return true; | |||
}; | |||
if (static_cast<size_t>(hccl_secondary_stream_num) <= secondary_stream_vec.size()) { | |||
GELOGI("Number of secondary stream is enough to be reused."); | |||
for (int64_t i = 0; i < hccl_secondary_stream_num; ++i) { | |||
if (!lock_weak_ptr(i)) { | |||
GELOGE(FAILED, "Lock weak ptr failed."); | |||
return false; | |||
} | |||
} | |||
} else { | |||
GELOGI("Need to reuse secondary stream and create new secondary stream."); | |||
size_t created_stream_num = secondary_stream_vec.size(); | |||
for (size_t i = 0; i < secondary_stream_vec.size(); ++i) { | |||
if (!lock_weak_ptr(i)) { | |||
GELOGE(FAILED, "Lock weak ptr failed."); | |||
return false; | |||
} | |||
} | |||
ret = CreateStream(hccl_secondary_stream_num - created_stream_num, master_stream_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(RT_FAILED, "Create hccl stream failed."); | |||
return false; | |||
} | |||
} | |||
GELOGI("Initialize hccl secondary stream success, hccl_secondary_stream_num =%ld", hccl_secondary_stream_num); | |||
} else { | |||
GELOGI("Need to create secondary stream for %s with new mainstream %ld.", task_info_->op_name().c_str(), | |||
master_stream_id); | |||
ret = CreateStream(hccl_secondary_stream_num, master_stream_id); | |||
if (!ret) { | |||
GELOGE(RT_FAILED, "Create hccl stream failed."); | |||
return false; | |||
} | |||
} | |||
return true; | |||
} | |||
bool HcclTask::CreateStream(int64_t stream_num, int64_t master_stream_id) { | |||
GELOGI("Start to create %ld hccl secondary stream.", stream_num); | |||
for (int64_t i = 0; i < stream_num; ++i) { | |||
rtStream_t stream = nullptr; | |||
bool ret = CreateStream(rt_model_handle_, &stream); | |||
if (!ret) { | |||
GELOGE(FAILED, "CreateStream failed."); | |||
return false; | |||
} | |||
GELOGD("hccl_stream addr is=%p", stream); | |||
auto shared_stream = std::make_shared<StreamGuard>(rt_model_handle_, stream); | |||
if (shared_stream == nullptr) { | |||
GELOGE(FAILED, "MakeShared failed."); | |||
return false; | |||
} | |||
SaveHcclSecondaryStream(master_stream_id, shared_stream); | |||
secondary_stream_list_.push_back(shared_stream); | |||
} | |||
GELOGI("CreateStream success."); | |||
return true; | |||
} | |||
bool HcclTask::CreateStream(rtModel_t model, rtStream_t *stream) const { | |||
if (stream == nullptr) { | |||
GELOGE(FAILED, "Output param stream is null."); | |||
return false; | |||
} | |||
rtError_t rt_ret = rtStreamCreateWithFlags(stream, priority_, RT_STREAM_PERSISTENT | RT_STREAM_FORCE_COPY); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return false; | |||
} | |||
// Create secondary stream, inactive by default, activated by hccl | |||
rt_ret = rtModelBindStream(model, *stream, RT_MODEL_WAIT_ACTIVE_STREAM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret); | |||
return false; | |||
} | |||
return true; | |||
} | |||
void HcclTask::SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream) { | |||
if (model_stream_mapping_.find(rt_model_handle_) == model_stream_mapping_.end()) { | |||
model_stream_mapping_.emplace(rt_model_handle_, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>()); | |||
} | |||
std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>> &master_secondary_stream_map = | |||
model_stream_mapping_.at(rt_model_handle_); | |||
master_secondary_stream_map[master_stream_id].emplace_back(stream); | |||
} | |||
HcclTask::StreamGuard::~StreamGuard() { | |||
rtError_t rt_ret = rtModelUnbindStream(model_, stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Unbind stream from model failed!"); | |||
return; | |||
} | |||
rt_ret = rtStreamDestroy(stream_); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "Destroy stream failed!"); | |||
return; | |||
} | |||
} | |||
REGISTER_TASK(TaskInfoType::HCCL, HcclTask, HcclTaskInfo); | |||
} // namespace model_runner | |||
} // namespace ge |
@@ -19,7 +19,9 @@ | |||
#include <memory> | |||
#include <set> | |||
#include <map> | |||
#include <vector> | |||
#include <mutex> | |||
#include "ge_runtime/task/task.h" | |||
namespace ge { | |||
@@ -33,18 +35,34 @@ class HcclTask : public TaskRepeater<HcclTaskInfo> { | |||
bool Distribute() override; | |||
private: | |||
class StreamGuard; | |||
bool SetSecondaryStream(); | |||
bool CreateStream(int64_t stream_num, int64_t master_stream_id); | |||
bool CreateStream(rtModel_t model, rtStream_t *stream) const; | |||
void SaveHcclSecondaryStream(int64_t master_stream_id, const std::shared_ptr<StreamGuard> &stream); | |||
std::shared_ptr<HcclTaskInfo> task_info_; | |||
void *stream_; | |||
void *workspace_mem_; | |||
rtModel_t rt_model_handle_; | |||
int32_t priority_; | |||
std::vector<void *> slave_stream_list_; | |||
std::function<bool(void *, void *)> hcom_bind_model_; | |||
std::function<bool(void *)> hcom_unbind_model_; | |||
std::function<bool(std::shared_ptr<HcclTaskInfo>, void *)> hcom_distribute_task_; | |||
static std::set<rtModel_t> rt_model_handle_list_; | |||
std::vector<std::shared_ptr<StreamGuard>> secondary_stream_list_; | |||
// map<key: model pointer, value: map<key: primary stream id, value: vector<secondary stream pointer>>> | |||
static std::map<rtModel_t, std::map<uint32_t, std::vector<std::weak_ptr<StreamGuard>>>> model_stream_mapping_; | |||
static std::mutex model_stream_mapping_mutex_; | |||
}; | |||
std::set<rtModel_t> HcclTask::rt_model_handle_list_{}; | |||
class HcclTask::StreamGuard { | |||
public: | |||
StreamGuard(rtModel_t model, rtStream_t stream) : model_(model), stream_(stream) {} | |||
~StreamGuard(); | |||
rtStream_t GetStream() const { return stream_; } | |||
private: | |||
rtModel_t model_; | |||
rtStream_t stream_; | |||
}; | |||
} // namespace model_runner | |||
} // namespace ge | |||
@@ -200,7 +200,6 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons | |||
} | |||
static void GetOpsProtoPath(string &opsproto_path) { | |||
GELOGI("Start to get ops proto path schedule."); | |||
const char *path_env = std::getenv("ASCEND_OPP_PATH"); | |||
if (path_env != nullptr) { | |||
string path = path_env; | |||
@@ -383,7 +382,6 @@ bool GeGenerator::Impl::ParseVersion(const std::string &line, std::string &versi | |||
} | |||
version = temp.substr(pos + flag.size()); | |||
GELOGI("Version=%s", version.c_str()); | |||
return true; | |||
} | |||
@@ -425,7 +423,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) { | |||
path_base = path_base.substr(0, path_base.rfind('/') + 1); | |||
std::string version_path = path_base + "version.info"; | |||
GELOGI("version_path is %s", version_path.c_str()); | |||
std::string version; | |||
if (!GetVersionFromPath(version_path, version)) { | |||
GELOGW("Get atc version information failed!"); | |||
@@ -436,7 +433,6 @@ bool GeGenerator::Impl::SetAtcVersionInfo(AttrHolder &obj) { | |||
GELOGW("Ge model set atc version failed!"); | |||
return false; | |||
} | |||
GELOGI("Ge model set atc version information success."); | |||
return true; | |||
} | |||
@@ -449,7 +445,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { | |||
} | |||
std::string version_path = path_env; | |||
version_path += "/version.info"; | |||
GELOGI("version_path is %s", version_path.c_str()); | |||
std::string version; | |||
if (!GetVersionFromPath(version_path, version)) { | |||
GELOGW("Get opp version information failed!"); | |||
@@ -460,7 +455,6 @@ bool GeGenerator::Impl::SetOppVersionInfo(AttrHolder &obj) { | |||
GELOGW("Ge model set opp version failed!"); | |||
return false; | |||
} | |||
GELOGI("Ge Model set opp version information success."); | |||
return true; | |||
} | |||
@@ -469,7 +463,7 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
rtContext_t ctx = nullptr; | |||
auto rt = rtCtxGetCurrent(&ctx); | |||
if (rt != RT_ERROR_NONE) { | |||
GELOGW("Current ctx is null."); | |||
GELOGD("Current ctx is null."); | |||
ctx = nullptr; | |||
} | |||
@@ -524,7 +518,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||
(void)rtCtxSetCurrent(ctx); | |||
} | |||
GELOGI("GenerateOfflineModel success."); | |||
return SUCCESS; | |||
} | |||
@@ -713,7 +706,6 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> | |||
return GE_GENERATOR_GRAPH_MANAGER_ADD_GRAPH_FAILED; | |||
} | |||
GELOGI("Model inputs size is %zu", inputs.size()); | |||
graph_manager_.SetOptionsRunGraphFlag(false); | |||
static std::atomic<uint64_t> atomic_session_id(0); | |||
@@ -102,7 +102,6 @@ void GraphBuilder::SetOptions(const ge::GraphManagerOptions &options) { | |||
} | |||
Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | |||
GELOGI("Begin to calculate op running param."); | |||
GE_CHECK_NOTNULL(graph); | |||
auto instance_ptr = ge::GELib::GetInstance(); | |||
if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { | |||
@@ -140,7 +139,6 @@ Status GraphBuilder::CalcOpParam(const ge::ComputeGraphPtr &graph) { | |||
auto parent_node = graph->GetParentNode(); | |||
if (parent_node == nullptr) { | |||
GELOGI("Graph[%s] do not have parent node, no need update parent node output size.", graph->GetName().c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -189,7 +187,6 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||
Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||
GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||
GELOGI("Start to build model."); | |||
if (comp_graph == nullptr) { | |||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | |||
return GE_GRAPH_PARAM_NULLPTR; | |||
@@ -267,7 +264,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||
} | |||
GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | |||
"Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | |||
GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||
GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||
GE_TIMESTAMP_END(BuildSubgraph, "GraphBuilder::Build"); | |||
return SUCCESS; | |||
} | |||
@@ -306,7 +303,7 @@ Status GraphBuilder::BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeMo | |||
} | |||
GE_CHK_STATUS_RET(builder.SaveDataToModel(*model_ptr, *ge_model_ptr), | |||
"Graph[%s] builder SaveDataToModel() return fail.", comp_graph->GetName().c_str()); | |||
GELOGI("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||
GELOGD("Success to build graph[%s] model.", comp_graph->GetName().c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -542,7 +539,6 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||
} | |||
Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | |||
GELOGI("[SecondPartition] second partition."); | |||
GE_TIMESTAMP_START(GraphPartition2); | |||
auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | |||
if (ret != SUCCESS) { | |||
@@ -33,7 +33,7 @@ Status LabelAllocator::AssignFunctionalLabels() { | |||
} | |||
// Add label task for sub graph. | |||
GELOGI("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); | |||
GELOGD("AssignFunctionalLabels start: %s.", compute_graph_->GetName().c_str()); | |||
std::set<NodePtr> functional_nodes; | |||
for (auto graph : compute_graph_->GetAllSubgraphs()) { | |||
if (!CollectFunctionalNode(graph, functional_nodes)) { | |||
@@ -597,10 +597,10 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap | |||
return status; | |||
} | |||
GELOGI("Subgraphs of graph %s:", graph->GetName().c_str()); | |||
GELOGD("Subgraphs of graph %s:", graph->GetName().c_str()); | |||
for (const auto &subgraph : subgraphs) { | |||
if (subgraph != nullptr) { | |||
GELOGI("subgraph: %s", subgraph->name.c_str()); | |||
GELOGD("subgraph: %s", subgraph->name.c_str()); | |||
} | |||
} | |||
@@ -664,9 +664,9 @@ Status LogicalStreamAllocator::RunPasses(const ComputeGraphPtr &graph, const vec | |||
Status status = pass->Run(graph, subgraphs, context_); | |||
if (status == SUCCESS) { | |||
GELOGI("Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||
GELOGD("Stream pass %s return SUCCESS.", pass->GetName().c_str()); | |||
} else if (status == NOT_CHANGED) { | |||
GELOGI("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||
GELOGD("Stream pass %s return NOT_CHANGED.", pass->GetName().c_str()); | |||
} else { | |||
GELOGE(status, "Stream pass %s failed.", pass->GetName().c_str()); | |||
return status; | |||
@@ -76,7 +76,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||
auto range_number = static_cast<size_t>( | |||
ceil(log(all_memory_size.back() / static_cast<double>(all_memory_size.front())) / log(kLogBase))); | |||
range_number = (range_number == 0) ? 1 : range_number; | |||
GELOGI("Range number: %zu", range_number); | |||
GELOGD("Range number: %zu", range_number); | |||
vector<vector<int64_t>> ranges(range_number); | |||
GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0."); | |||
@@ -114,7 +114,7 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||
range_ceils.push_back(range.back()); | |||
} | |||
} | |||
GELOGI("Range ceils: %s", ToString(range_ceils).c_str()); | |||
GELOGD("Range ceils: %s", ToString(range_ceils).c_str()); | |||
return SUCCESS; | |||
} | |||
@@ -455,12 +455,11 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||
GetNodeWorkSpaceSize(n, temp); | |||
all_memory_size.insert(all_memory_size.end(), temp.begin(), temp.end()); | |||
} | |||
GELOGI("The last atomic_addr_clean node id: %ld", atomic_addr_clean_id_); | |||
for (const auto &pair : symbol_size_) { | |||
all_memory_size.emplace_back(pair.second); | |||
} | |||
sort(all_memory_size.begin(), all_memory_size.end()); | |||
GELOGI("All memory size: %s", ToString(all_memory_size).c_str()); | |||
GELOGD("All memory size: %s", ToString(all_memory_size).c_str()); | |||
for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) { | |||
if (*iter == 0) { | |||
@@ -495,7 +494,7 @@ size_t GetBlockSize(size_t size, const vector<int64_t> &ranges) { | |||
bool IsDirectOutputNode(const NodePtr &node, int idx) { | |||
if ((node != nullptr) && (node->GetOpDesc() != nullptr) && (node->GetOpDesc()->GetType() == NETOUTPUT)) { | |||
GELOGI("This is netoutput node, the input node mem can not be reused"); | |||
GELOGD("This is netoutput node, the input node mem can not be reused"); | |||
return true; | |||
} | |||
return false; | |||
@@ -1102,7 +1101,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i | |||
if (static_cast<uint32_t>(index) == output_index) { | |||
if (node->GetOwnerComputeGraph() != nullptr) { | |||
string graph_name = node->GetOwnerComputeGraph()->GetName(); | |||
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%d] streamid[%ld].", graph_name.c_str(), | |||
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||
op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | |||
} | |||
return true; | |||
@@ -1219,7 +1218,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
int64_t stream_id = op_desc->GetStreamId(); | |||
vector<int64_t> memorys_type; | |||
bool has_mem_type_attr = ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_OUTPUT_MEM_TYPE_LIST, memorys_type); | |||
GELOGI("Assign memory node[%s], output size[%d], output memory type size[%d]", op_desc->GetName().c_str(), | |||
GELOGD("Assign memory node[%s], output size[%zu], output memory type size[%zu]", op_desc->GetName().c_str(), | |||
op_desc->GetOutputsSize(), memorys_type.size()); | |||
if (has_mem_type_attr && (memorys_type.size() != op_desc->GetOutputsSize())) { | |||
GELOGE(INTERNAL_ERROR, "fusion: node[%s], output memory size err[outputsize:%zu, memorysize:%zu]", | |||
@@ -1257,7 +1256,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
// fusion: other type's size not means malloc HBM memory | |||
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | |||
if (l1_flag) { | |||
GELOGI("fusion: node[%s], output[%s], output memory type [%d]", | |||
GELOGI("fusion: node[%s], output[%s], output memory type [%ld]", | |||
op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | |||
size = 0; | |||
} | |||
@@ -1311,7 +1310,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||
/// | |||
void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
(void)ge::GetContext().GetOption(OPTION_EXEC_DISABLE_REUSED_MEMORY, ge_disable_reuse_mem_env_); | |||
GEEVENT("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | |||
GELOGD("Reuse memory %s", ge_disable_reuse_mem_env_ == "1" ? "close" : "open"); | |||
string op_no_reuse_mem_str; | |||
const char *op_no_reuse_mem = std::getenv(OP_NO_REUSE_MEM); | |||
GE_IF_BOOL_EXEC(op_no_reuse_mem != nullptr, op_no_reuse_mem_str = string(op_no_reuse_mem); | |||
@@ -1337,7 +1336,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
vector<bool> workspace_reuse_flag; | |||
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetListBool(node_op_desc, kAttrNameWorkspaceReuseFlag, workspace_reuse_flag), | |||
GELOGD("OP %s get workspace_reuse_flag attr failed", node_op_desc->GetName().c_str())); | |||
GELOGI("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), | |||
GELOGD("Assign memory node[%s], size [temp:%zu, memory type size:%zu]", node_op_desc->GetName().c_str(), | |||
temp.size(), tvm_workspace_memory_type.size()); | |||
if (has_tvm_workspace_mem_type_attr && (temp.size() != tvm_workspace_memory_type.size())) { | |||
@@ -1350,7 +1349,7 @@ void BlockMemAssigner::AssignMemoryWithReuse(vector<int64_t> &ranges) { | |||
bool workspace_skip_flag = false; | |||
if (has_tvm_workspace_mem_type_attr && tvm_workspace_memory_type[i] == RT_MEMORY_L1) { | |||
GELOGI( | |||
"fusion: node[%s]workspace index[%d] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", | |||
"fusion: node[%s]workspace index[%zu] is not hbm type, add to zero_memory_list, workspace memory type [%ld]", | |||
node_op_desc->GetName().c_str(), i, tvm_workspace_memory_type[i]); | |||
workspace_skip_flag = true; | |||
} | |||
@@ -1628,7 +1627,7 @@ void BlockMemAssigner::ResizeMemoryBlocks() { | |||
memory_block->SetTailOffset(p2p_mem_offset_ - 1); | |||
} | |||
} | |||
GELOGI("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", | |||
GELOGD("mem_offset_ exclude zero_copy_memory is %zu, p2p_mem_offset_ exclude zero_copy_memory is %zu.", | |||
mem_offset_, p2p_mem_offset_); | |||
} | |||
@@ -117,7 +117,7 @@ Status GraphMemoryAssigner::AssignMemory() { | |||
return ge::FAILED; | |||
} | |||
int64_t var_size_assign = ge::VarManager::Instance(session_id)->GetVarMemSize(RT_MEMORY_HBM) - var_size_before_assign; | |||
GELOGI("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign); | |||
GELOGD("GraphMemoryAssigner::AssignMemory variable size = %ld", var_size_assign); | |||
mem_assigner_ = std::move(mem_assigner); | |||
@@ -296,7 +296,6 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||
mem_offset[RT_MEMORY_HBM] += memory_block->Size(); | |||
memory_block->SetTailOffset(mem_offset[RT_MEMORY_HBM] - 1); | |||
} | |||
GELOGI("mem_offset_ include zero_copy_memory is %zu.", mem_offset[RT_MEMORY_HBM]); | |||
// set offset for zero copy nodes | |||
priority_assigner->SetOpMemOffset(true); | |||
@@ -309,14 +308,13 @@ Status GraphMemoryAssigner::AssignZeroCopyMemory(map<int64_t, size_t> &mem_offse | |||
} | |||
iter->second.mem_offset_ = mem_offset[RT_MEMORY_HBM]; | |||
GELOGI("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, | |||
GELOGD("max_mem_offset:%zu, mem_offset:%zu, zero_mem_copy_size:%zu.", mem_offset[RT_MEMORY_HBM], mem_offset_tmp, | |||
zero_mem_copy_size); | |||
return SUCCESS; | |||
} | |||
Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
GELOGI("Begin to reassign continuous memory"); | |||
Status ret; | |||
for (auto &node : compute_graph_->GetAllNodes()) { | |||
// Get the continuous input type of the node, default is false | |||
@@ -387,7 +385,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { | |||
} | |||
} | |||
for (auto pair : memory_offset_) { | |||
GELOGI("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||
GELOGD("After reassign continuous memory, memory type = %ld, memoffset = %zu.", pair.first, | |||
pair.second.mem_offset_); | |||
} | |||
return ge::SUCCESS; | |||
@@ -456,7 +454,7 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, | |||
output_list.at(peer_out_data_anchor->GetIdx()) - continuous_mem_start + tensor_desc_size + MEM_ALIGN_SIZE; | |||
} | |||
GELOGI( | |||
"[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%zu] stream_id[%ld] size[%zu] " | |||
"[IMAS]Check Continuous input : Set %s name[%s] output[%d] offset to [%ld] stream_id[%ld] size[%u] " | |||
"real_size[%u].", | |||
node->GetOwnerComputeGraph()->GetName().c_str(), peer_op_desc->GetName().c_str(), | |||
peer_out_data_anchor->GetIdx(), output_list.at(peer_out_data_anchor->GetIdx()), peer_op_desc->GetStreamId(), | |||
@@ -834,7 +832,6 @@ Status GraphMemoryAssigner::ReAssignVirtualNodesMemory(map<string, vector<NodePt | |||
string max_batch_label; | |||
GE_CHK_STATUS_RET(GetMaxBatchLabel(mem_reuse_nodes_map, mem_reuse_model, max_batch_label), | |||
"Get max batch label failed."); | |||
GELOGI("The batch label of max batch virtual nodes is %s.", max_batch_label.c_str()); | |||
PrintMemoryOffset(); | |||
vector<size_t> nodes_mem_offset_list; | |||
for (auto &i_map : mem_reuse_nodes_map) { | |||
@@ -1507,7 +1504,7 @@ ge::Status GraphMemoryAssigner::UpdateOpInputOffset(const NodePtr &node, vector< | |||
GE_CHK_STATUS(TensorUtils::GetDataOffset(tensor_desc, input_offset)); | |||
} | |||
GELOGI("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||
GELOGD("%s node[%s] input[%d] is set from node[%s] out index[%lu] offset[%ld]", | |||
has_mem_type_attr == true ? "Fusion" : "", | |||
tmp_op_desc->GetName().c_str(), | |||
valid_input_index, | |||
@@ -62,9 +62,9 @@ Status HybridMemAssigner::Assign() { | |||
std::unique_ptr<BlockMemAssigner> priority_assigner; | |||
GELOGI("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size); | |||
GELOGD("Binary-block memory size:%zu, max-block memory size:%zu", bin_mem_size, max_mem_size); | |||
if (bin_mem_size <= max_mem_size) { | |||
GELOGI("Use binary-block memory assigner method"); | |||
GELOGD("Use binary-block memory assigner method"); | |||
priority_assigner = std::move(binary_assigner); | |||
} else { | |||
GELOGI("Use max-block memory assigner method"); | |||
@@ -189,7 +189,6 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) { | |||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
const auto &src_node = peer_out_anchor->GetOwnerNode(); | |||
if (!NodeUtils::GetConstOpType(src_node, const_type)) { | |||
GELOGI("Node %s:%zu, sorce node: %s Not Const", n->GetName().c_str(), index, src_node->GetName().c_str()); | |||
continue; | |||
} | |||
@@ -232,7 +231,6 @@ Status ModelBuilder::AdjustConstWeightSize(const ge::NodePtr &node, size_t &mem_ | |||
Status ModelBuilder::SetInputOutputDesc() { | |||
Status ret; | |||
GELOGI("Start to SetInputOutputDesc."); | |||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||
auto node_op_desc = n->GetOpDesc(); | |||
@@ -245,7 +243,6 @@ Status ModelBuilder::SetInputOutputDesc() { | |||
// final graph. | |||
if ((GetLocalOmgContext().format == domi::DOMI_TENSOR_ND) && (!node_op_desc->HasAttr("_is_single_op")) && | |||
((node_op_desc->GetType() == DATA_TYPE) || (node_op_desc->GetType() == NETOUTPUT))) { | |||
GELOGI("The node [%s] format should be set ND.", node_op_desc->GetName().c_str()); | |||
auto inputDescsPtr = node_op_desc->GetAllInputsDescPtr(); | |||
auto outputDescsPtr = node_op_desc->GetAllOutputsDescPtr(); | |||
ge::Format format = ge::FORMAT_ND; | |||
@@ -290,7 +287,7 @@ void ModelBuilder::AddNodeInputProperty() { | |||
vector<int64_t> src_index_list; | |||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr!"); continue); | |||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
GE_IF_BOOL_EXEC(node_op_desc->HasAttr(MERGE_PRENODE_FLAG), continue); | |||
ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | |||
@@ -347,7 +344,6 @@ void ModelBuilder::AddNodeInputProperty() { | |||
} | |||
Status ModelBuilder::AdjustInputTensorFlag() { | |||
GELOGI("Start to AdjustInputTensorFlag."); | |||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||
if ((n->GetType() == DATA_TYPE) || (n->GetType() == AIPP_DATA_TYPE)) { | |||
GELOGD("Data node: %s.", n->GetName().c_str()); | |||
@@ -441,7 +437,6 @@ Status ModelBuilder::BuildModelDef(ge::Model &model) { | |||
return FAILED); | |||
const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(session_id_); | |||
bool is_op_debug = dump_properties.IsOpDebugOpen(); | |||
GELOGI("Get op debug:%d", is_op_debug); | |||
if (is_op_debug) { | |||
if (!ge::AttrUtils::SetBool(&model, ATTR_OP_DEBUG_FLAG, is_op_debug)) { | |||
GELOGE(FAILED, "SetBool of ATTR_OP_DEBUG_FLAG failed."); | |||
@@ -608,7 +603,6 @@ Status ModelBuilder::SaveDataToModel(ge::Model &model, ge::GeModel &ge_model) { | |||
} | |||
tbe_name_set.insert(tbe_kernel->GetName()); | |||
tbe_kernel_store_.AddTBEKernel(tbe_kernel); | |||
GELOGI("Add tbe kernel bin %s", tbe_kernel->GetName().c_str()); | |||
} | |||
for (const ge::NodePtr &n : compute_graph_->GetNodes(compute_graph_->GetGraphUnknownFlag())) { | |||
@@ -678,7 +672,6 @@ Status ModelBuilder::PreBuildModel() { | |||
GELOGE(FAILED, "Graph_ is not valid."); | |||
return FAILED; | |||
} | |||
GELOGI("BuildModel begin."); | |||
GE_CHK_STATUS_RET(SetInputOutputDesc(), "SetInputOutputDesc Failed!"); | |||
@@ -140,7 +140,7 @@ void RunContextUtil::DestroyRtModelResources() noexcept { | |||
Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &graph, Buffer &buffer, | |||
const uint64_t session_id) { | |||
GELOGI("Begin to Create RunContext, session_id = %lu", session_id); | |||
GELOGD("Begin to Create RunContext, session_id = %lu", session_id); | |||
// check params | |||
if (graph == nullptr) { | |||
GELOGE(PARAM_INVALID, "CreateRunContext param graph is null. session_id=%lu", session_id); | |||
@@ -152,21 +152,21 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||
GELOGE(INTERNAL_ERROR, "Get stream_num attr from model_def failed. session_id=%lu", session_id); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGI("Stream_num = %u", stream_num); | |||
GELOGD("Stream_num = %u", stream_num); | |||
uint32_t event_num = 0; | |||
if (!AttrUtils::GetInt(&model, ATTR_MODEL_EVENT_NUM, event_num)) { | |||
GELOGE(INTERNAL_ERROR, "Get event_num attr from model failed. session_id=%lu", session_id); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGI("Event_num = %u", event_num); | |||
GELOGD("Event_num = %u", event_num); | |||
uint32_t label_num = 0; | |||
if (!AttrUtils::GetInt(&model, ATTR_MODEL_LABEL_NUM, label_num)) { | |||
GELOGE(INTERNAL_ERROR, "Get label_num attr from model failed. session_id=%lu", session_id); | |||
return INTERNAL_ERROR; | |||
} | |||
GELOGI("Label_num = %u", label_num); | |||
GELOGD("Label_num = %u", label_num); | |||
Status ret = CreateRtModelResources(stream_num, event_num, label_num); | |||
if (ret != SUCCESS) { | |||
@@ -198,11 +198,11 @@ Status RunContextUtil::CreateRunContext(Model &model, const ComputeGraphPtr &gra | |||
void RunContextUtil::PrintMemInfo() { | |||
for (auto iter : mem_type_to_data_mem_base_) { | |||
GELOGI("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); | |||
GELOGD("CreateRunContext: memory type = %ld, data memory base = %p", iter.first, iter.second); | |||
} | |||
for (auto iter : mem_type_to_data_mem_size_) { | |||
GELOGI("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); | |||
GELOGD("CreateRunContext: memory type = %ld, data memory size = %lu", iter.first, iter.second); | |||
} | |||
} | |||
@@ -67,11 +67,10 @@ StreamAllocator::StreamAllocator(ComputeGraphPtr whole_graph, const Graph2SubGra | |||
} | |||
enable_single_stream_ = (single_stream_str == kTrueStr) ? true : false; | |||
GELOGI("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr); | |||
GELOGD("Enable single stream: %s.", enable_single_stream_ ? kTrueStr : kFalseStr); | |||
} | |||
Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &max_parallel_num, bool hcom_parallel) { | |||
GELOGI("Assign logical streams start."); | |||
GE_CHECK_NOTNULL(whole_graph_); | |||
GE_DUMP(whole_graph_, "BeforeAssignedLogicalStreams"); | |||
@@ -92,15 +91,12 @@ Status StreamAllocator::AssignLogicalStreams(const std::map<std::string, int> &m | |||
return status; | |||
} | |||
GE_DUMP(whole_graph_, "AfterAssignedLogicalStreams"); | |||
GELOGI("Assign logical streams success."); | |||
return SUCCESS; | |||
} | |||
// After allocating the logical stream in the graph, refresh the stream in the | |||
// graph and insert the synchronization node. | |||
Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_num) { | |||
GELOGI("RefreshRealStream start."); | |||
GE_CHECK_NOTNULL(whole_graph_); | |||
GE_DUMP(whole_graph_, "BeforeRefreshRealStream"); | |||
@@ -174,8 +170,7 @@ Status StreamAllocator::RefreshRealStream(int64_t &stream_num, int64_t &event_nu | |||
GELOGI("None of nodes need to assign stream, stream num is 0, it will cause error, so change it to 1"); | |||
stream_num_ = 1; | |||
} | |||
GELOGI("stream num: %ld, event num: %u.", stream_num_, event_num_); | |||
GELOGI("RefreshRealStream successfully."); | |||
GELOGD("stream num: %ld, event num: %u.", stream_num_, event_num_); | |||
stream_num = stream_num_; | |||
event_num = static_cast<int64_t>(event_num_); | |||
@@ -1241,7 +1236,7 @@ void StreamAllocator::DumpEvents() { | |||
for (const auto &one_pair : after_refresh_stream_nodes) { | |||
int64_t stream_id = one_pair.first; | |||
GELOGI("After RefreshRealStream: stream %ld.", stream_id); | |||
GELOGD("After RefreshRealStream: stream %ld.", stream_id); | |||
for (const auto &node : one_pair.second) { | |||
string send_event_str; | |||
@@ -1273,7 +1268,7 @@ Status StreamAllocator::GetMaxStreamAndTask(bool huge_stream, uint32_t &max_stre | |||
GELOGE(FAILED, "Get max stream and task count by rts failed."); | |||
return FAILED; | |||
} | |||
GELOGI("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count); | |||
GELOGD("Allowed max stream count: %u, max task count per stream: %u.", max_stream_count, max_task_count); | |||
return SUCCESS; | |||
} | |||
@@ -30,7 +30,7 @@ StreamGraphOptimizer::~StreamGraphOptimizer() {} | |||
void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map) { | |||
size_t node_size = comp_graph->GetAllNodesSize(); | |||
GELOGI("Refresh placeholder and end nodeId start from node num: %zu", node_size); | |||
GELOGD("Refresh placeholder and end nodeId start from node num: %zu", node_size); | |||
for (const auto &subgraph_pair : subgraph_map) { | |||
for (const auto &subgraph_info : subgraph_pair.second) { | |||
ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | |||
@@ -74,8 +74,6 @@ bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) { | |||
Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &comp_graph, | |||
Graph2SubGraphInfoList &subgraph_map, | |||
struct RunContext &run_context) { | |||
GELOGI("Optimize streamed subgraph start."); | |||
RefreshNodeId(comp_graph, subgraph_map); | |||
std::shared_ptr<GELib> instance = ge::GELib::GetInstance(); | |||
@@ -86,7 +84,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||
ComputeGraphPtr subgraph = subgraph_info->GetSubGraph(); | |||
GE_CHECK_NOTNULL(subgraph); | |||
GELOGI("Optimize subgraph %s", subgraph->GetName().c_str()); | |||
GELOGD("Optimize subgraph %s", subgraph->GetName().c_str()); | |||
std::string engine_name = subgraph_info->GetEngineName(); | |||
@@ -128,7 +126,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||
subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size(), ret); | |||
return ret; | |||
} | |||
GELOGI( | |||
GELOGD( | |||
"[optimizeStreamedSubGraph]: optimize streamed subgraph success, subgraph: %s, engine_name: %s, graph " | |||
"Optimizer num: %zu!", | |||
subgraph->GetName().c_str(), engine_name.c_str(), graph_optimizers.size()); | |||
@@ -137,7 +135,7 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||
} | |||
} | |||
GELOGI("Optimize streamed subgraph success."); | |||
GELOGD("Optimize streamed subgraph success."); | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -68,7 +68,7 @@ TaskGenerator::TaskGenerator(uint8_t *var_mem_base, uint64_t var_mem_size) { | |||
TaskGenerator::~TaskGenerator() {} | |||
Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t session_id, RunContext &run_context) { | |||
GELOGI("Begin to Get TaskInfo. session_id=%lu", session_id); | |||
GELOGD("Begin to Get TaskInfo. session_id=%lu", session_id); | |||
// Check params | |||
if (graph == nullptr) { | |||
GELOGE(PARAM_INVALID, "GetTaskInfo param graph is null. session_id=%lu", session_id); | |||
@@ -120,7 +120,7 @@ Status TaskGenerator::GetTaskInfo(Model &model, ComputeGraphPtr &graph, uint64_t | |||
return ret; | |||
} | |||
GELOGI("Get TaskInfo success. session_id=%lu", session_id); | |||
GELOGD("Get TaskInfo success. session_id=%lu", session_id); | |||
return SUCCESS; | |||
} | |||
@@ -232,7 +232,7 @@ Status TaskGenerator::SaveFusionNodes(map<int64_t, std::vector<NodePtr>> &fusion | |||
} | |||
} | |||
} | |||
GELOGI("Fusion: get fusion group numbers [%zu].", fusion_nodes.size()); | |||
GELOGD("Fusion: get fusion group numbers [%zu].", fusion_nodes.size()); | |||
return SUCCESS; | |||
} | |||
@@ -575,7 +575,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_ | |||
continuous_op_lists.back().emplace_back(op_desc); | |||
} | |||
} | |||
GELOGI("Number of continuous node lists is %zu.", continuous_op_lists.size()); | |||
GELOGD("Number of continuous node lists is %zu.", continuous_op_lists.size()); | |||
for (const auto &continuous_ops : continuous_op_lists) { | |||
map<string, std::pair<OpDescPtr, OpDescPtr>> first_and_last_ops; | |||
@@ -846,13 +846,12 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint | |||
Status TaskGenerator::FindProfilingTaskIndex(const ComputeGraphPtr &graph, ProfilingPoint &profiling_point, | |||
vector<uint32_t> &all_reduce_nodes) const { | |||
GELOGI("Start FindProfilingTaskIndex."); | |||
GE_CHECK_NOTNULL(graph); | |||
const char *profiling_mode = std::getenv(kProfilingMode); | |||
bool is_profiling = (profiling_mode != nullptr) || ProfilingManager::Instance().ProfilingOn() || | |||
ProfilingManager::Instance().ProfilingTrainingTraceOn(); | |||
if (!is_profiling) { | |||
GELOGW("Profiling is not open."); | |||
GELOGD("Profiling is not open."); | |||
return SUCCESS; | |||
} | |||
@@ -115,7 +115,7 @@ Status GraphExecutor::FreeInOutBuffer() { | |||
malloc_flag_ = false; | |||
return SUCCESS; | |||
} else { | |||
GELOGI("[GraphManager] not malloc buffer."); | |||
GELOGD("[GraphManager] not malloc buffer."); | |||
return SUCCESS; | |||
} | |||
} | |||
@@ -286,7 +286,7 @@ Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asyn | |||
return ret; | |||
} | |||
GELOGI("Execute model success, model_id:%u.", model_id); | |||
GELOGD("Execute model success, model_id:%u.", model_id); | |||
return SUCCESS; | |||
} | |||
@@ -131,7 +131,7 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v | |||
for (const auto &virtual_args_addr : virtual_args_addrs) { | |||
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { | |||
src_addrs.push_back(mbuf_list.at(index)); | |||
dst_addrs.push_back(reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); | |||
} | |||
} | |||
index++; | |||
@@ -159,7 +159,6 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_ | |||
} | |||
void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||
GELOGI("Start to save data %s message", node->GetName().c_str()); | |||
if (node != nullptr) { | |||
auto input_op_desc = node->GetOpDesc(); | |||
if (input_op_desc == nullptr) { | |||
@@ -180,7 +179,6 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) { | |||
{op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}}); | |||
} | |||
} | |||
GELOGI("Save data message successfully"); | |||
} | |||
} | |||
@@ -218,7 +216,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr | |||
GELOGW("Get input size failed"); | |||
return; | |||
} | |||
GELOGI("Save dump op info, the input size is %ld", input_size); | |||
GELOGD("Save dump op info, the input size is %ld", input_size); | |||
op_desc_info.input_size.emplace_back(input_size); | |||
} | |||
for (size_t j = 0; j < op->GetOutputsSize(); ++j) { | |||
@@ -234,7 +232,7 @@ void DataDumper::SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr | |||
GELOGW("Get input size failed"); | |||
return; | |||
} | |||
GELOGI("Save dump op info, the output size is %ld", output_size); | |||
GELOGD("Save dump op info, the output size is %ld", output_size); | |||
op_desc_info.output_size.emplace_back(output_size); | |||
} | |||
op_desc_info.input_addrs = ModelUtils::GetInputDataAddrs(model_param, op); | |||
@@ -301,22 +299,16 @@ static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uin | |||
if (step_id != 0) { | |||
GELOGI("step_id exists."); | |||
op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id)); | |||
} else { | |||
GELOGI("step_id is null."); | |||
} | |||
if (loop_per_iter != 0) { | |||
GELOGI("loop_per_iter exists."); | |||
op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter)); | |||
} else { | |||
GELOGI("loop_per_iter is null."); | |||
} | |||
if (loop_cond != 0) { | |||
GELOGI("loop_cond exists."); | |||
op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond)); | |||
} else { | |||
GELOGI("loop_cond is null."); | |||
} | |||
} | |||
@@ -672,7 +664,7 @@ Status DataDumper::LoadDumpInfo() { | |||
PrintCheckLog(dump_list_key); | |||
if (op_list_.empty()) { | |||
GELOGW("op_list_ is empty"); | |||
GELOGD("op_list_ is empty"); | |||
} | |||
aicpu::dump::OpMappingInfo op_mapping_info; | |||
@@ -684,8 +676,6 @@ Status DataDumper::LoadDumpInfo() { | |||
op_mapping_info.set_flag(kAicpuLoadFlag); | |||
op_mapping_info.set_dump_step(dump_properties_.GetDumpStep()); | |||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | |||
GELOGI("Dump step is %s and dump path is %s dump model is %s in load dump info", | |||
dump_properties_.GetDumpStep().c_str(), dump_path.c_str(), dump_list_key.c_str()); | |||
auto ret = BuildTaskInfo(op_mapping_info); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Build task info failed"); | |||
@@ -812,7 +802,6 @@ void DataDumper::SetOpDebugIdToAicpu(uint32_t task_id, uint32_t stream_id, void | |||
Status DataDumper::UnloadDumpInfo() { | |||
if (!load_flag_) { | |||
GELOGI("No need to UnloadDumpInfo."); | |||
load_flag_ = false; | |||
return SUCCESS; | |||
} | |||
@@ -838,7 +827,6 @@ Status DataDumper::UnloadDumpInfo() { | |||
void DataDumper::PrintCheckLog(string &dump_list_key) { | |||
std::set<std::string> model_list = dump_properties_.GetAllDumpModel(); | |||
if (model_list.empty()) { | |||
GELOGI("No model need dump."); | |||
return; | |||
} | |||
@@ -17,11 +17,7 @@ | |||
#include "graph/load/new_model_manager/davinci_model.h" | |||
#include <cce/dnn.h> | |||
#include <dlfcn.h> | |||
#include <graph/utils/node_utils.h> | |||
#include <pthread.h> | |||
#include <sched.h> | |||
#include <sys/prctl.h> | |||
#include <algorithm> | |||
#include <map> | |||
#include <utility> | |||
@@ -206,7 +202,6 @@ DavinciModel::~DavinciModel() { | |||
OpDebugUnRegister(); | |||
GELOGI("do ReleaseTask"); | |||
ReleaseTask(); | |||
CleanTbeHandle(); | |||
@@ -337,7 +332,6 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p | |||
GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | |||
weights_mem_base_, weights_size); | |||
GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
GELOGI("copy weights data to device"); | |||
} | |||
GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed."); | |||
@@ -488,7 +482,7 @@ Status DavinciModel::SetTSDevice() { | |||
int64_t value = 0; | |||
bool ret = ge::AttrUtils::GetInt(ge_model_, ATTR_MODEL_CORE_TYPE, value); | |||
uint32_t core_type = ret ? static_cast<uint32_t>(value) : 0; | |||
GELOGI("SetTSDevice: %u", core_type); | |||
GELOGD("SetTSDevice: %u", core_type); | |||
rtError_t rt_ret = rtSetTSDevice(core_type); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "SetTSDevice failed, ret: 0x%X", rt_ret); | |||
@@ -543,7 +537,6 @@ Status DavinciModel::OpDebugRegister() { | |||
} | |||
void DavinciModel::OpDebugUnRegister() { | |||
GELOGI("OpDebugUnRegister, is_op_debug_reg_ = %d", is_op_debug_reg_); | |||
if (is_op_debug_reg_) { | |||
debug_reg_mutex_.unlock(); | |||
rtError_t rt_ret = RT_ERROR_NONE; | |||
@@ -648,7 +641,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed."); | |||
GE_TIMESTAMP_START(InitModelMem); | |||
GELOGI("Known node is %d", known_node_); | |||
GELOGD("Known node is %d", known_node_); | |||
if (!known_node_) { | |||
GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size)); | |||
data_inputer_ = new (std::nothrow) DataInputer(); | |||
@@ -708,7 +701,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||
} | |||
Shrink(); | |||
GELOGI("Davinci model init success."); | |||
return ret; | |||
} | |||
@@ -968,18 +960,21 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
const vector<int64_t> output_size_list = ModelUtils::GetOutputSize(op_desc); | |||
const vector<void *> virtual_addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, op_desc); | |||
const vector<int64_t> output_offset_list = op_desc->GetOutputOffset(); | |||
if (output_offset_list.size() != virtual_addr_list.size()) { | |||
GELOGE(PARAM_INVALID, "virtual_addr size:%zu should be equal to offset size:%zu.", virtual_addr_list.size(), | |||
output_offset_list.size()); | |||
if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size()) || | |||
(output_offset_list.size() != virtual_addr_list.size())) { | |||
GELOGE(PARAM_INVALID, "Data[%s] init failed: output size is %zu, virtual_addr size is %zu, offset size is %zu.", | |||
op_desc->GetName().c_str(), output_size_list.size(), virtual_addr_list.size(), output_offset_list.size()); | |||
return PARAM_INVALID; | |||
} | |||
auto data_index = data_op_index; | |||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_INDEX, data_index)) { | |||
GELOGI("ge_train: get new index %u, old %u", data_index, data_op_index); | |||
GELOGD("ge_train: get new index %u, old %u", data_index, data_op_index); | |||
} | |||
bool fusion_flag = false; | |||
ZeroCopyOffset zero_copy_offset; | |||
Status ret = zero_copy_offset.InitInputDataInfo(output_size_list, virtual_addr_list, op_desc, fusion_flag); | |||
int64_t data_size = output_size_list[kDataIndex]; | |||
void *virtual_addr = virtual_addr_list[kDataIndex]; | |||
Status ret = zero_copy_offset.InitInputDataInfo(data_size, virtual_addr, op_desc, fusion_flag); | |||
if (ret != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "InitDataInfo of input_info %s failed.", op_desc->GetName().c_str()); | |||
return PARAM_INVALID; | |||
@@ -996,7 +991,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||
new_input_outside_addrs_[addr] = zero_copy_offset; | |||
} | |||
GELOGI("SetInputOutsideAddr success."); | |||
data_op_index++; | |||
if (InitInputZeroCopyNodes(node) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | |||
@@ -1131,7 +1125,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
DisableZeroCopy(real_addr); | |||
real_virtual_addrs_.insert(real_addr); | |||
} | |||
GELOGI("SetOutputOutsideAddr success."); | |||
} | |||
GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | |||
@@ -1147,8 +1140,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||
GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS, | |||
GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;); | |||
} | |||
GELOGI("DavinciModel::InitNetoutput success."); | |||
return SUCCESS; | |||
} | |||
@@ -1459,7 +1450,7 @@ Status DavinciModel::InitLabelSet(const OpDescPtr &op_desc) { | |||
return INTERNAL_ERROR; | |||
} | |||
if (label_index >= LabelNum()) { | |||
GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %zu.", label_index, LabelNum()); | |||
GELOGE(INTERNAL_ERROR, "InitLabelSet: label index: %u >= label size: %u.", label_index, LabelNum()); | |||
return INTERNAL_ERROR; | |||
} | |||
if (label_id_indication_.count(label_index) > 0) { | |||
@@ -1968,7 +1959,6 @@ void DavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_i | |||
if (op->GetType() != NETOUTPUT) { | |||
continue; | |||
} | |||
GELOGI("Start to get dynamic output dims attr"); | |||
if (!AttrUtils::GetListStr(op, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_shape_info)) { | |||
GELOGD("Can not get dynamic output dims attr"); | |||
} | |||
@@ -2124,7 +2114,7 @@ void DavinciModel::CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputD | |||
} | |||
Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) { | |||
GELOGI("Output node size: %zu", output_op_list_.size()); | |||
GELOGD("Output node size: %zu", output_op_list_.size()); | |||
for (size_t i = 0; i < output_op_list_.size(); i++) { | |||
auto &op_desc = output_op_list_[i]; | |||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||
@@ -2187,7 +2177,7 @@ Status DavinciModel::CopyInputData(const InputData &input_data, bool device_data | |||
void *mem_addr = data.second.GetBasicAddr(); | |||
void *data_buf_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(data_buf.data)); | |||
uint64_t data_buf_length = data_buf.length; | |||
GELOGI("[IMAS]CopyPlainData memcpy graph_%lu type[F] input[%lu] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", | |||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] input[%u] dst[%p] src[%p] mem_size[%lu] datasize[%lu]", | |||
runtime_param_.graph_id, data.first, mem_addr, data_buf_addr, data_size, data_buf_length); | |||
GE_CHK_RT_RET(rtMemcpy(mem_addr, data_size, data_buf_addr, data_buf_length, kind)); | |||
} | |||
@@ -2235,8 +2225,6 @@ Status DavinciModel::SinkModelProfile() { | |||
Msprof::Engine::Reporter *reporter = PluginImpl::GetPluginReporter(); | |||
GE_IF_BOOL_EXEC(reporter == nullptr, GELOGI("Profiling report is nullptr!"); return SUCCESS); | |||
GELOGI("Start collect model load profiling data."); | |||
Msprof::Engine::ReporterData reporter_data{}; | |||
// report model data tag name | |||
std::string tag_name; | |||
@@ -2294,7 +2282,6 @@ Status DavinciModel::SinkModelProfile() { | |||
uint32_t op_num = fusion_op_info->original_op_names.size(); | |||
uint32_t task_id = task->GetTaskID(); | |||
if (op_num > 0) { | |||
GELOGI("task.id = %u, opNum = %u", task_id, op_num); | |||
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); | |||
} | |||
} | |||
@@ -2552,21 +2539,23 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||
if (is_dynamic_) { | |||
GELOGI("No need to check output data size."); | |||
} else if (buffer.length < mem_size) { | |||
GELOGE(FAILED, "Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | |||
GELOGE(FAILED, "Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length); | |||
return FAILED; | |||
} else if (buffer.length > mem_size) { | |||
GELOGW("Tensor data size=%lu, buffer size=%u", mem_size, buffer.length); | |||
GELOGW("Tensor data size=%lu, buffer size=%lu", mem_size, buffer.length); | |||
} | |||
int64_t data_size = output.second.GetDataSize(); | |||
if (is_online_infer_dynamic_) { | |||
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; | |||
data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||
if (merge_nodes_gear_and_real_out_size_info_.find(idx) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[idx]; | |||
data_size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||
} | |||
} | |||
uint64_t buffer_length = buffer.length; | |||
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | |||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%ld] datasize[%u]", | |||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||
runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | |||
GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | |||
idx++; | |||
@@ -2598,11 +2587,13 @@ Status DavinciModel::GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data | |||
return ret); | |||
std::vector<int64_t> output_shape = input_desc->GetShape().GetDims(); | |||
if (is_online_infer_dynamic_) { | |||
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||
size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||
auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||
output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||
is_dynamic_ = true; | |||
if (merge_nodes_gear_and_real_out_size_info_.find(i) != merge_nodes_gear_and_real_out_size_info_.end()) { | |||
auto gear_and_real_out_size_info = merge_nodes_gear_and_real_out_size_info_[i]; | |||
size = gear_and_real_out_size_info[cur_dynamic_dims_]; | |||
auto gear_and_real_out_shape_info = merge_nodes_gear_and_real_out_shape_info_[i]; | |||
output_shape = gear_and_real_out_shape_info[cur_dynamic_dims_]; | |||
is_dynamic_ = true; | |||
} | |||
} | |||
GELOGI("Output size is %ld, output shape is %s.", size, formats::JoinToString(output_shape).c_str()); | |||
out_buffer_size_vec.push_back(size); | |||
@@ -2759,16 +2750,6 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
InputData current_data = data_wrapper->GetInput(); | |||
GELOGI("Model thread Run begin, model id:%u, data index:%u.", model_id, current_data.index); | |||
if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | |||
model->cur_dynamic_dims_.clear(); | |||
GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | |||
auto shape_data_buffer_data = current_data.blobs.back().data; | |||
auto shape_data_buffer_length = current_data.blobs.back().length; | |||
model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data), | |||
reinterpret_cast<int64_t *>(shape_data_buffer_data) + | |||
shape_data_buffer_length / sizeof(int64_t)); | |||
GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | |||
} | |||
GE_TIMESTAMP_START(Model_SyncVarData); | |||
ret = model->SyncVarData(); | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | |||
@@ -2785,6 +2766,18 @@ void *DavinciModel::Run(DavinciModel *model) { | |||
ret != SUCCESS, (void)model->ReturnResult(current_data.index, false, false, data_wrapper->GetOutput()); | |||
CsaInteract::GetInstance().StoreInternalErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC); | |||
continue, "Copy input data to model failed."); // [No need to check value] | |||
if (model->is_online_infer_dynamic_ && !model->is_getnext_sink_dynamic_) { | |||
model->cur_dynamic_dims_.clear(); | |||
GE_IF_BOOL_EXEC(current_data.blobs.empty(), break); | |||
auto shape_data_buffer_data = current_data.blobs.back().data; | |||
auto shape_data_buffer_length = current_data.blobs.back().length; | |||
model->cur_dynamic_dims_.assign(reinterpret_cast<int64_t *>(shape_data_buffer_data), | |||
reinterpret_cast<int64_t *>(shape_data_buffer_data) + | |||
shape_data_buffer_length / sizeof(int64_t)); | |||
GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str()); | |||
delete[] (int64_t *)current_data.blobs.back().data; | |||
current_data.blobs.pop_back(); | |||
} | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_INFER_START)); | |||
if (ProfilingManager::Instance().ProfilingOpTraceOn()) { | |||
@@ -2982,7 +2975,7 @@ void DavinciModel::UnbindTaskSinkStream() { | |||
Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs) { | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap in."); | |||
if (inputs.size() > data_op_list_.size()) { | |||
GELOGE(FAILED, "input data addr %u should less than input op number %u.", inputs.size(), data_op_list_.size()); | |||
GELOGE(FAILED, "input data addr %zu should less than input op number %zu.", inputs.size(), data_op_list_.size()); | |||
return FAILED; | |||
} | |||
// remove zero copy addr in last iteration | |||
@@ -2991,16 +2984,16 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||
for (size_t i = 0; i < inputs.size(); ++i) { | |||
const vector<void *> addr_list = ModelUtils::GetOutputDataAddrs(runtime_param_, data_op_list_[i]); | |||
knonw_input_data_info_[addr_list[kDataIndex]] = inputs[i]; | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %d,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap input %zu,v addr %p,p addr %p .", i, addr_list[kDataIndex], inputs[i]); | |||
} | |||
if (output_op_list_.size() < kOutputNum) { | |||
GELOGW("output op num in graph is %u.", output_op_list_.size()); | |||
GELOGW("output op num in graph is %zu.", output_op_list_.size()); | |||
return SUCCESS; | |||
} | |||
const vector<void *> addr_list = ModelUtils::GetInputDataAddrs(runtime_param_, output_op_list_[kDataIndex]); | |||
for (size_t i = 0; i < addr_list.size() && i < outputs.size(); ++i) { | |||
knonw_output_data_info_[addr_list[i]] = outputs[i]; | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %d,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap output %zu,v addr %p,p addr %p .", i, addr_list[i], outputs[i]); | |||
} | |||
GELOGI("DavinciModel::CreateKnownZeroCopyMap success."); | |||
return SUCCESS; | |||
@@ -3010,13 +3003,13 @@ Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||
for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||
if (it_in != knonw_input_data_info_.end()) { | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %d,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
knonw_input_data_info_.at(total_io_addrs_[i])); | |||
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||
} | |||
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||
if (it_out != knonw_output_data_info_.end()) { | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %d,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||
knonw_output_data_info_.at(total_io_addrs_[i])); | |||
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||
} | |||
@@ -3037,7 +3030,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
if (task != nullptr) { | |||
Status ret = task->UpdateArgs(); | |||
if (ret != SUCCESS) { | |||
GELOGE(FAILED, "task %d created by davinci model is nullptr.", task_index); | |||
GELOGE(FAILED, "task %zu created by davinci model is nullptr.", task_index); | |||
return FAILED; | |||
} | |||
} | |||
@@ -3066,7 +3059,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||
} | |||
Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) { | |||
GELOGI("InitTaskInfo in, task size %zu", model_task_def.task().size()); | |||
GELOGI("InitTaskInfo in, task size %d", model_task_def.task().size()); | |||
task_list_.resize(model_task_def.task_size()); | |||
for (int i = 0; i < model_task_def.task_size(); ++i) { | |||
// dynamic shape will create task_list_ before | |||
@@ -3142,14 +3135,14 @@ Status DavinciModel::DistributeTask() { | |||
task_desc_info_.clear(); | |||
bool flag = GetL1FusionEnableOption(); | |||
char *skt_enable_env = std::getenv("SKT_ENABLE"); | |||
int64_t env_flag = (skt_enable_env != nullptr) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||
if (env_flag != 0) { | |||
flag = true; | |||
} | |||
const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | |||
GELOGI("there are %zu task need to save.", task_list_.size()); | |||
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | |||
auto &task = task_list_.at(task_index); | |||
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | |||
@@ -3331,7 +3324,7 @@ bool DavinciModel::CheckInputAndModelSize(const int64_t &input_size, const int64 | |||
if (input_size > op_size) { | |||
GELOGW( | |||
"Input size [%u] is bigger than om size need [%u], " | |||
"Input size [%ld] is bigger than om size need [%ld], " | |||
"MAY cause inference result ERROR, please check model input", | |||
input_size, op_size); | |||
} | |||
@@ -3413,7 +3406,7 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||
for (const auto &data : data_info) { | |||
if (data.first >= blobs.size()) { // check data index. | |||
GELOGE(FAILED, "Verify %s data num failed: can not find No.%zu data, because user only feeds %zu", | |||
GELOGE(FAILED, "Verify %s data num failed: can not find No.%u data, because user only feeds %zu", | |||
input_or_output.c_str(), data.first, blobs.size()); | |||
return FAILED; | |||
} | |||
@@ -3522,7 +3515,7 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { | |||
GeTensor *tensor = const_cast<GeTensor *>(v_weights[0].get()); | |||
GE_IF_BOOL_EXEC(static_cast<size_t>(v_output_size[0]) < tensor->GetData().size(), | |||
GELOGE(PARAM_INVALID, "output size:%u less than weight data size:%zu", v_output_size[0], | |||
GELOGE(PARAM_INVALID, "output size:%ld less than weight data size:%zu", v_output_size[0], | |||
tensor->GetData().size()); | |||
return PARAM_INVALID;); | |||
@@ -3546,12 +3539,12 @@ Status DavinciModel::InitConstant(const OpDescPtr &op_desc) { | |||
uint64_t offset = static_cast<uint64_t>(elem_num * kBytes); | |||
uint64_t hbm_raw_data_base_addr = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset; | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(v_output_addr[0])) + offset; | |||
for (int64_t i = elem_num - 1; i >= 0; --i) { | |||
buff[i] = hbm_raw_data_base_addr + (buff[i] - buff[0]); | |||
} | |||
} | |||
GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%u] datasize[%zu]", | |||
GELOGI("[IMAS]InitConstant memcpy graph_%u type[V] name[%s] output[%d] memaddr[%p] mem_size[%lu] datasize[%zu]", | |||
runtime_param_.graph_id, op_desc->GetName().c_str(), 0, v_output_addr[0], v_output_size[0], | |||
tensor->GetData().size()); | |||
GE_CHK_RT_RET(rtMemcpy(v_output_addr[0], v_output_size[0], tensor->GetData().data(), tensor->GetData().size(), | |||
@@ -3582,12 +3575,12 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||
if (rtQueryFunctionRegistered(bin_file_key) != RT_ERROR_NONE) { | |||
void *bin_handle = nullptr; | |||
if (!kernel_store.FindTBEHandle(bin_file_key, bin_handle)) { | |||
GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key); | |||
GELOGD("TBE: can't find the kernel_name[%s] in HandleMap", bin_file_key); | |||
rtDevBinary_t binary; | |||
std::string json_string; | |||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_MAGIC, json_string), | |||
GELOGI("Get original type of session_graph_id.")); | |||
GELOGD("Get original type of session_graph_id.")); | |||
if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") { | |||
binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU; | |||
} else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") { | |||
@@ -3603,13 +3596,13 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||
binary.data = tbe_kernel->GetBinData(); | |||
binary.length = tbe_kernel->GetBinDataSize(); | |||
GELOGI("TBE: binary.length: %lu", binary.length); | |||
GELOGD("TBE: binary.length: %lu", binary.length); | |||
GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle)); | |||
std::string meta_data; | |||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, TVM_ATTR_NAME_METADATA, meta_data), | |||
GELOGI("Get original type of json_string")); | |||
GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||
GELOGD("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str()); | |||
GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str()))); | |||
kernel_store.StoreTBEHandle(bin_file_key, bin_handle, tbe_kernel); | |||
@@ -3620,8 +3613,7 @@ Status DavinciModel::InitTbeHandle(const OpDescPtr &op_desc) { | |||
std::string kernel_name; | |||
GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc, op_desc->GetName() + "_kernelname", kernel_name), | |||
GELOGI("Get original type of kernel_name")); | |||
GELOGI("TBE: binfile_key=%s, kernel_name=%s", bin_file_key, kernel_name.c_str()); | |||
GELOGD("Get original type of kernel_name")); | |||
GE_CHK_RT_RET(rtFunctionRegister(bin_handle, bin_file_key, bin_file_key, kernel_name.c_str(), 0)); | |||
used_tbe_handle_map_[bin_file_key] = 1; // Init used num to 1. | |||
return SUCCESS; | |||
@@ -3816,7 +3808,7 @@ Status DavinciModel::InitModelStream(rtStream_t stream) { | |||
Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, | |||
OutputData &output_data) { | |||
is_async_mode_ = async_mode; | |||
GELOGI("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | |||
GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | |||
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | |||
is_dynamic_ = input_data.is_dynamic_batch; | |||
if (!is_dynamic_) { | |||
@@ -3828,7 +3820,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u", | |||
model_id_); | |||
GELOGI("current_data.index=%u", input_data.index); | |||
GELOGD("current_data.index=%u", input_data.index); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END)); | |||
if (!task_list_.empty()) { | |||
@@ -3837,7 +3829,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0); | |||
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret)); | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END)); | |||
GELOGI("rtModelExecute end"); | |||
GELOGD("rtModelExecute end"); | |||
} | |||
if (!is_async_mode_) { | |||
@@ -3849,7 +3841,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||
// report model time data | |||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data)); | |||
GELOGI("Model run end, model id:%u", model_id_); | |||
GELOGD("Model run end, model id:%u", model_id_); | |||
return SUCCESS; | |||
} | |||
@@ -3906,7 +3898,9 @@ Status DavinciModel::InitEntryTask() { | |||
uint8_t *DavinciModel::MallocFeatureMapMem(size_t data_size) { | |||
uint8_t *mem_base = nullptr; | |||
const string purpose("feature map,used for op input and output."); | |||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||
if (res == EN_OK) { | |||
data_size = static_cast<size_t>(VarManager::Instance(session_id_)->GetGraphMemoryMaxSize()); | |||
string memory_key = std::to_string(0) + "_f"; | |||
mem_base = MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, memory_key, data_size, GetDeviceId()); | |||
@@ -3936,7 +3930,9 @@ uint8_t *DavinciModel::MallocP2PMem(size_t p2p_data_size) { | |||
uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | |||
uint8_t *weights_mem_base = nullptr; | |||
const string purpose("weights memory in inference network."); | |||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||
if (res == EN_OK) { | |||
string weight_memory_key = std::to_string(0) + "_w"; | |||
weights_mem_base = | |||
MemManager::Instance(RT_MEMORY_HBM)->MallocMemory(purpose, weight_memory_key, weights_size, GetDeviceId()); | |||
@@ -3947,7 +3943,9 @@ uint8_t *DavinciModel::MallocWeightsMem(size_t weights_size) { | |||
} | |||
void DavinciModel::FreeFeatureMapMem() { | |||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr && is_inner_mem_base_) { | |||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||
if (res == EN_OK && is_inner_mem_base_) { | |||
string weight_memory_key = std::to_string(0) + "_f"; | |||
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(weight_memory_key) != nullptr) { | |||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(weight_memory_key, GetDeviceId()), | |||
@@ -3979,7 +3977,9 @@ void DavinciModel::FreeP2PMem() { | |||
} | |||
void DavinciModel::FreeWeightsMem() { | |||
if (std::getenv(kEnvGeuseStaticMemory) != nullptr) { | |||
char ge_static_mem_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(kEnvGeuseStaticMemory, ge_static_mem_env, MMPA_MAX_PATH); | |||
if (res == EN_OK) { | |||
string memory_key = std::to_string(0) + "_w"; | |||
if (MemManager::Instance(RT_MEMORY_HBM)->GetMemoryAddr(memory_key) != nullptr) { | |||
GE_CHK_STATUS(MemManager::Instance(RT_MEMORY_HBM)->FreeMemory(memory_key, GetDeviceId()), | |||
@@ -3995,7 +3995,6 @@ void DavinciModel::FreeWeightsMem() { | |||
} | |||
Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) { | |||
GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id); | |||
rtContext_t ctx = nullptr; | |||
rtError_t rt_ret = rtCtxGetCurrent(&ctx); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -4016,13 +4015,10 @@ Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) | |||
GE_CHK_STATUS_RET_NOLOG( | |||
TransVarDataUtils::TransAllVarData(variable_node_list, session_id_, ctx, graph_id, kThreadNum)); | |||
GELOGI("TransAllVarData success."); | |||
return SUCCESS; | |||
} | |||
void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||
GELOGI("set data dumper args, name: %s, id: %u.", name_.c_str(), model_id_); | |||
data_dumper_.SetModelName(name_); | |||
data_dumper_.SetModelId(model_id_); | |||
data_dumper_.SetOmName(om_name_); | |||
@@ -4048,15 +4044,13 @@ void DavinciModel::SetDataDumperArgs(const ComputeGraphPtr &compute_graph) { | |||
} | |||
return v_output_addr[0]; | |||
} | |||
GELOGW("op is null."); | |||
GELOGD("op is null."); | |||
return nullptr; | |||
}; | |||
data_dumper_.SetLoopAddr(get_var_addr(GetVariableOp(NODE_NAME_GLOBAL_STEP), runtime_param_), | |||
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_PER_ITER), runtime_param_), | |||
get_var_addr(GetVariableOp(NODE_NAME_FLOWCTRL_LOOP_COND), runtime_param_)); | |||
GELOGI("SetDataDumperArgs end."); | |||
} | |||
uint32_t DavinciModel::GetFlowctrlIndex(uint32_t op_index) { | |||
@@ -4075,7 +4069,6 @@ void DavinciModel::SaveHcclFollowStream(int64_t main_stream_id, rtStream_t strea | |||
} | |||
Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_desc_info) { | |||
GELOGI("GetComputeGraphInfo start."); | |||
auto &all_op_desc = data_dumper_.GetAllOpDescInfo(); | |||
for (auto &op_desc : all_op_desc) { | |||
ComputeGraphDescInfo compute_graph_info; | |||
@@ -4095,7 +4088,6 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||
graph_desc_info.emplace_back(compute_graph_info); | |||
} | |||
GELOGI("GetComputeGraphInfo end."); | |||
return SUCCESS; | |||
} | |||
@@ -4160,7 +4152,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input | |||
vector<std::string> inputs; | |||
if (AttrUtils::GetListStr(data_op, ATTR_NAME_AIPP_INPUTS, inputs) && !inputs.empty()) { | |||
GELOGI("GetAllAippInputOutputDims: Data: %s has %u related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||
GELOGI("GetAllAippInputOutputDims: Data: %s has %zu related aippInfo.", data_op->GetName().c_str(), inputs.size()); | |||
for (auto it : inputs) { | |||
InputOutputDims input_info; | |||
ParseAIPPInfo(it, input_info); | |||
@@ -4171,7 +4163,7 @@ Status DavinciModel::GetAllAippInputOutputDims(uint32_t index, std::vector<Input | |||
int64_t data_input_size; | |||
(void)TensorUtils::GetSize(*(data_op->GetInputDescPtr(kDataIndex)), data_input_size); | |||
GELOGD( | |||
"GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %u, tensor_size: %zu, format: " | |||
"GetAllAippInputOutputDims related Data[%d]: tensor_name is %s, dim_num is %zu, tensor_size: %zu, format: " | |||
"%s, data_type: %s, shape: %s .", | |||
index, data_op->GetName().c_str(), data_input_desc->GetShape().GetDimNum(), data_input_size, | |||
TypeUtils::FormatToSerialString(data_input_desc->GetFormat()).c_str(), | |||
@@ -202,7 +202,6 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||
} | |||
ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
GELOGI("Destroy aicpu session for infer, model id is %u.", model_id); | |||
std::lock_guard<std::mutex> lock(map_mutex_); | |||
auto it = model_map_.find(model_id); | |||
if (it == model_map_.end()) { | |||
@@ -210,7 +209,6 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||
return GE_EXEC_MODEL_ID_INVALID; | |||
} | |||
uint64_t session_id = it->second->GetSessionId(); | |||
GELOGI("Destroy aicpu session for infer, session id is %lu.", session_id); | |||
DestroyAicpuSession(session_id); | |||
return SUCCESS; | |||
} | |||
@@ -407,10 +405,6 @@ Status ModelManager::Unload(uint32_t model_id) { | |||
} | |||
std::lock_guard<std::mutex> lock(exeception_infos_mutex_); | |||
exception_infos_.clear(); | |||
for (auto addr : shape_data_addrs_[model_id]) { | |||
delete[] addr; | |||
} | |||
shape_data_addrs_.erase(model_id); | |||
return SUCCESS; | |||
} | |||
@@ -475,6 +469,19 @@ Status ModelManager::GetCurDynamicDims(const vector<vector<int64_t>> &user_real_ | |||
} | |||
} | |||
GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); | |||
bool cur_dynamic_dims_valid = false; | |||
std::vector<std::string> shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); | |||
for (auto dynamic_dim : shape_strs) { | |||
if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) { | |||
cur_dynamic_dims_valid = true; | |||
break; | |||
} | |||
} | |||
if (!cur_dynamic_dims_valid) { | |||
GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.", | |||
formats::JoinToString(cur_dynamic_dims).c_str()); | |||
return INTERNAL_ERROR; | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -517,7 +524,6 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||
"Failed to memcpy data."); | |||
data.length = length; | |||
input_data.blobs.push_back(data); | |||
shape_data_addrs_[model_id].emplace_back(reinterpret_cast<int64_t *>(data.data)); | |||
} | |||
} | |||
@@ -1019,8 +1025,8 @@ Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippTyp | |||
Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
std::lock_guard<std::mutex> lock(session_id_create_mutex_); | |||
struct timeval tv; | |||
if (gettimeofday(&tv, nullptr) != 0) { | |||
mmTimeval tv; | |||
if (mmGetTimeOfDay(&tv, nullptr) != 0) { | |||
GELOGE(INTERNAL_ERROR, "Failed to get current time."); | |||
return INTERNAL_ERROR; | |||
} | |||
@@ -1037,8 +1043,8 @@ Status ModelManager::GenSessionId(uint64_t &session_id) { | |||
Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr<ModelListener> listener, | |||
void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || access(model.key.c_str(), F_OK) == 0, | |||
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, | |||
GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, | |||
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, | |||
"input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); | |||
GenModelId(&model_id); | |||
@@ -1123,7 +1129,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||
Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, | |||
const std::vector<uint32_t> &input_queue_ids, | |||
const std::vector<uint32_t> &output_queue_ids) { | |||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || access(model_data.key.c_str(), F_OK) == 0, | |||
GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK, | |||
ACL_ERROR_GE_EXEC_MODEL_KEY_PATH_INVALID, "input key file path %s is not valid, %s", | |||
model_data.key.c_str(), strerror(errno)); | |||
@@ -1205,7 +1211,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||
Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); | |||
if (status == SUCCESS) { | |||
GELOGI("Execute model %u success.", model_id); | |||
GELOGD("Execute model %u success.", model_id); | |||
} | |||
return status; | |||
@@ -1262,7 +1268,6 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_ | |||
} | |||
Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { | |||
GELOGI("LaunchCustAucpuSo in, kernel name %s", kernel_name.c_str()); | |||
std::lock_guard<std::mutex> lock(cust_aicpu_mutex_); | |||
if (cust_aicpu_so_.size() == 0) return SUCCESS; | |||
// get current context | |||
@@ -18,7 +18,6 @@ | |||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_MODEL_MANAGER_H_ | |||
#include <model/ge_root_model.h> | |||
#include <pthread.h> | |||
#include <stdint.h> | |||
#include <algorithm> | |||
#include <map> | |||
@@ -364,7 +363,6 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||
std::map<uintptr_t, std::map<std::string, CustAICPUKernelPtr>> cust_aicpu_so_; | |||
static DumpProperties dump_properties_; | |||
std::map<uint32_t, std::vector<int64_t *>> shape_data_addrs_; | |||
}; | |||
} // namespace ge | |||
@@ -337,9 +337,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
continue; | |||
} | |||
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), | |||
GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); | |||
break); | |||
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), break); | |||
int64_t input_offset = v_input_offset[non_const_index]; | |||
non_const_index++; | |||
@@ -356,7 +354,7 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||
// feature maps | |||
void *mem_addr = nullptr; | |||
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | |||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(input_offset)); | |||
mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(input_offset)); | |||
v_input_data_addr.push_back(mem_addr); | |||
} else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | |||
int64_t tensor_size = 0; | |||
@@ -424,7 +422,7 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||
// feature maps | |||
void *mem_addr = nullptr; | |||
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion | |||
mem_addr = reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_output_offset[i])); | |||
mem_addr = reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_output_offset[i])); | |||
v_output_data_addr.push_back(mem_addr); | |||
} else if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_TS_4G) { | |||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | |||
@@ -500,7 +498,7 @@ vector<void *> ModelUtils::GetWorkspaceDataAddrs(const RuntimeParam &model_param | |||
continue; | |||
} | |||
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { | |||
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(reinterpret_cast<intptr_t>(v_workspace_offset[i]))); | |||
v_workspace_data_addr.push_back(reinterpret_cast<uint8_t *>(static_cast<intptr_t>(v_workspace_offset[i]))); | |||
GELOGI("[IMAS]GetWorkspaceDataAddrs graph_%u type[L1] name[%s], mem_addr[workspace index %zu]:0x%lx", | |||
model_param.graph_id, op_desc->GetName().c_str(), i, v_workspace_offset[i]); | |||
} else if (v_workspace_bytes[i] == 0) { | |||
@@ -149,7 +149,7 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin | |||
return FAILED; | |||
} | |||
uint64_t workspace_base_addr = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0])); | |||
uint64_t workspace_base_addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(workspace_data_addrs[0])); | |||
const vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | |||
const vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | |||
vector<void *> io_addrs; | |||
@@ -287,7 +287,7 @@ Status KernelExTaskInfo::CopyTaskInfo(const domi::KernelExDef &kernel_def, const | |||
} | |||
if (workspace_data_sizes[0] < static_cast<int64_t>(kernel_def.task_info_size())) { | |||
GELOGE(FAILED, "Node:%s workspace size is %zu, task info size is %zu.", op_desc->GetName().c_str(), | |||
GELOGE(FAILED, "Node:%s workspace size is %ld, task info size is %d.", op_desc->GetName().c_str(), | |||
workspace_data_sizes[0], kernel_def.task_info_size()); | |||
return FAILED; | |||
} | |||
@@ -31,6 +31,7 @@ | |||
#include "runtime/kernel.h" | |||
#include "super_kernel/super_kernel.h" | |||
#include "super_kernel/super_kernel_factory.h" | |||
#include "cce/aicpu_engine_struct.h" | |||
namespace { | |||
const uint8_t kL2LoadToDdr = 1; | |||
@@ -73,7 +74,8 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||
GELOGD("node[%s] is_n_batch_spilt %d", op_desc_->GetName().c_str(), is_n_batch_spilt_); | |||
(void)AttrUtils::GetInt(*op_desc_, ATTR_NAME_FUSION_GROUP_KEY, group_key_); | |||
has_group_key_ = (group_key_ != kInvalidGroupKey); | |||
GELOGD("node[%s] has_group_key_ %ld, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_); | |||
GELOGD("node[%s] has_group_key_ %d, group key is [%ld]", op_desc_->GetName().c_str(), has_group_key_, group_key_); | |||
// fusion_op_info | |||
vector<std::string> original_op_names; | |||
bool result = AttrUtils::GetListStr(op_desc_, ge::ATTR_NAME_DATA_DUMP_ORIGIN_OP_NAMES, original_op_names); | |||
@@ -176,7 +178,7 @@ void KernelTaskInfo::UpdateTaskId() { | |||
} | |||
task_id_ = task_id; | |||
stream_id_ = stream_id; | |||
GELOGI("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id); | |||
GELOGD("UpdateTaskId:UpdateTaskId [%u], stream id [%u]:", task_id, stream_id); | |||
} | |||
} | |||
@@ -216,7 +218,7 @@ Status KernelTaskInfo::SuperKernelLaunch() { | |||
rtError_t rt_ret; | |||
auto &skt_kernel_list = skt_info_.kernel_list; | |||
auto &skt_arg_list = skt_info_.arg_list; | |||
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%d] skt_arg_list[%d]", skt_kernel_list.size(), skt_arg_list.size()); | |||
GELOGI("SuperKernelLaunch: Skt_kernel_list size[%zu] skt_arg_list[%zu]", skt_kernel_list.size(), skt_arg_list.size()); | |||
if (skt_kernel_list.size() == kSKTSingleSize && skt_arg_list.size() == kSKTSingleSize) { | |||
rt_ret = rtKernelLaunchWithFlag(skt_info_.kernel_list[0], static_cast<uint32_t>(skt_info_.last_block_dim), | |||
skt_info_.arg_list[0], skt_info_.last_args_size, | |||
@@ -367,8 +369,9 @@ Status KernelTaskInfo::Distribute() { | |||
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | |||
} | |||
rtError_t rt_ret = RT_ERROR_NONE; | |||
char *skt_enable_env = getenv("SKT_ENABLE"); | |||
int64_t env_flag = (skt_enable_env != nullptr) ? strtol(skt_enable_env, nullptr, 10) : 0; | |||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||
int64_t env_flag = (res == EN_OK) ? strtol(skt_enable_env, nullptr, 10) : 0; | |||
bool call_skt = ((env_flag != 0) || is_l1_fusion_enable_); | |||
if (kernel_type_ == cce::ccKernelType::AI_CPU || kernel_type_ == cce::ccKernelType::CUST_AI_CPU) { | |||
GELOGI("distribute task info kernel_type %d, flag %d", kernel_type_, dump_flag_); | |||
@@ -379,7 +382,7 @@ Status KernelTaskInfo::Distribute() { | |||
call_save_dump_ = true; | |||
} else { | |||
/* default: not skt launch */ | |||
GELOGI( | |||
GELOGD( | |||
"KernelTaskInfo Distribute Start, sktenable:%d taskid:%u sktid:%u last_sktid:%u stubfunc_name:%s " | |||
"stubfunc:%p blockdim:%u stream:%p", | |||
call_skt, task_id_, skt_id_, skt_info_.last_task_id, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | |||
@@ -406,7 +409,7 @@ Status KernelTaskInfo::Distribute() { | |||
} | |||
// set for task_id_ | |||
UpdateTaskId(); | |||
GELOGI( | |||
GELOGD( | |||
"KernelTaskInfo Distribute Success. sktenable:%d taskid:%d sktid:%d stubfunc_name:%s stubfunc:%p " | |||
"blockdim:%d stream:%p", | |||
call_skt, task_id_, skt_id_, stub_func_name_.c_str(), stub_func_, block_dim_, stream_); | |||
@@ -747,15 +750,15 @@ Status KernelTaskInfo::InitAICPUCustomTask(uint32_t op_index, const domi::Kernel | |||
} | |||
} | |||
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[0])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0 | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_descs)); // arg 0 | |||
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[1])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1 | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.input_addrs)); // arg 1 | |||
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[2])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2 | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_descs)); // arg 2 | |||
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[3])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3 | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.output_addrs)); // arg 3 | |||
*(reinterpret_cast<uint64_t *>(args + ctx_.argsOffset[4])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4 | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(custom_info_.attr_handle)); // arg 4 | |||
rt_ret = rtMalloc(&args_, args_size_, RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
@@ -913,7 +916,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||
aicpu_param_head->extInfoLength = reinterpret_cast<uintptr_t>(ext_info.size()); | |||
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||
// malloc device memory for args | |||
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | |||
@@ -956,12 +959,40 @@ Status KernelTaskInfo::InitAicpuTaskExtInfo(const std::string &ext_info) { | |||
if (ext_info.empty()) { | |||
return SUCCESS; | |||
} | |||
std::unique_ptr<uint8_t[]> copy_ext_info; | |||
copy_ext_info.reset(new(std::nothrow)uint8_t[ext_info.size()]); | |||
GE_CHECK_NOTNULL(copy_ext_info); | |||
auto sec_ret = memcpy_s(copy_ext_info.get(), ext_info.size(), ext_info.c_str(), ext_info.size()); | |||
if (sec_ret != EOK) { | |||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||
return FAILED; | |||
} | |||
auto ext_info_data = copy_ext_info.get(); | |||
size_t offset = 0; | |||
while (offset + sizeof(aicpu::FWKAdapter::ExtInfo) <= ext_info.size()) { | |||
auto aicpu_ext_info = reinterpret_cast<aicpu::FWKAdapter::ExtInfo *>(ext_info_data + offset); | |||
GELOGD("Ext infoType=%d, infoLen=%u.", aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||
if (aicpu_ext_info->infoType == aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO) { | |||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(SessionInfo), PARAM_INVALID, | |||
"Parse ext session info failed as infoLen must be %zu but %u.", | |||
sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||
SessionInfo *session_info = reinterpret_cast<SessionInfo *>(aicpu_ext_info->infoMsg); | |||
session_info->sessionId = davinci_model_->GetSessionId(); | |||
session_info->sessFlag = true; | |||
GELOGD("Update aicpu_task ext_info session_info session_id is %lu", session_info->sessionId); | |||
} | |||
offset += sizeof(aicpu::FWKAdapter::ExtInfo); | |||
offset += aicpu_ext_info->infoLen; | |||
} | |||
auto rt_ret = rtMalloc(&aicpu_ext_info_addr_, ext_info.size(), RT_MEMORY_HBM); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMalloc ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
} | |||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info.c_str(), ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
rt_ret = rtMemcpy(aicpu_ext_info_addr_, ext_info.size(), ext_info_data, ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGE(RT_FAILED, "rtMemcpy ext_info error: 0x%X, size=%zu", rt_ret, ext_info.size()); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||
@@ -1122,18 +1153,24 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u | |||
} | |||
GELOGI("FileName:%s, Path:%s.", file_name.c_str(), canonicalPath.c_str()); | |||
auto handle = dlopen(canonicalPath.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
auto handle = mmDlopen(canonicalPath.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
const char *error = ""; | |||
if (handle == nullptr) { | |||
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); | |||
error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", error); | |||
return FAILED; | |||
} | |||
cce::ccStatus_t cc_ret; | |||
std::string update_kernel_args = "ccUpdateKernelArgs"; | |||
auto cceUpdateKernelArgs = (cce::ccStatus_t(*)(cce::ccOpContext &, uint64_t, uint64_t, uint64_t, void *, uint64_t, | |||
void *))dlsym(handle, "ccUpdateKernelArgs"); | |||
void *))mmDlsym(handle, const_cast<char *>(update_kernel_args.c_str())); | |||
if (cceUpdateKernelArgs == nullptr) { | |||
GELOGE(FAILED, "Failed to invoke function ccUpdateKernelArgs"); | |||
if (dlclose(handle) != 0) { | |||
GELOGW("Failed to close handle %s", dlerror()); | |||
if (mmDlclose(handle) != 0) { | |||
error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to close handle %s", error); | |||
} | |||
return FAILED; | |||
} else { | |||
@@ -1146,8 +1183,10 @@ Status KernelTaskInfo::CceUpdateKernelArgs(const domi::KernelContext &context, u | |||
const_cast<char *>(kernel_def.args().data()), args_size_, sm_contrl); | |||
} | |||
} | |||
if (dlclose(handle) != 0) { | |||
GELOGW("Failed to close handle %s", dlerror()); | |||
if (mmDlclose(handle) != 0) { | |||
error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("Failed to close handle %s", error); | |||
return FAILED; | |||
} | |||
if (cc_ret != cce::CC_STATUS_SUCCESS) { | |||
@@ -1188,7 +1227,7 @@ Status KernelTaskInfo::SetFlowtable(std::string &flowtable, const domi::KernelDe | |||
*(reinterpret_cast<uint64_t *>( | |||
args + (reinterpret_cast<uint16_t *>(const_cast<char *>(context.args_offset().data())))[0])) = | |||
reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_)); | |||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(flowtable_)); | |||
} | |||
return SUCCESS; | |||
} | |||
@@ -23,7 +23,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) { | |||
const void *func_stub_ = this->GetFuncStub(); | |||
const void *args[] = {this->GetNavTablePtr(), | |||
reinterpret_cast<const void *>(reinterpret_cast<uintptr_t>(this->GetNavTableSize()))}; | |||
reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))}; | |||
rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return | |||
@@ -27,7 +27,7 @@ SuperKernelFactory &SuperKernelFactory::GetInstance() { | |||
Status SuperKernelFactory::Init() { | |||
if (!is_init_) { | |||
std::string skt_bin = "libcce_aicore.so"; | |||
handle_ = dlopen(skt_bin.c_str(), RTLD_NOW | RTLD_GLOBAL); | |||
handle_ = mmDlopen(skt_bin.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL); | |||
if (handle_ == nullptr) { | |||
GELOGE(FAILED, "SKT: open skt lib failed, please check LD_LIBRARY_PATH."); | |||
} | |||
@@ -85,8 +85,10 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||
"equal to 2"); | |||
return FAILED; | |||
} | |||
GELOGI("SKT: superkernel start fuse, superkernel size %d.", stub_func_list.size()); | |||
uint64_t nav_table[2 * stub_func_list.size()]; | |||
GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size()); | |||
const size_t nav_table_len = 2 * stub_func_list.size(); | |||
std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]); | |||
GE_CHECK_NOTNULL(nav_table); | |||
uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t); | |||
rtError_t rt_ret; | |||
@@ -99,16 +101,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list | |||
GELOGD("SKT: fuseKernels subFunc %p, device func address %p", stub_func_list[i], sub_device_func); | |||
// store two uint64_t address | |||
// address divided by 4 because of 32bits encoding, call offset will *4 when calculating | |||
nav_table[i * 2] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||
nav_table[i * 2] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sub_device_func)) / 4; | |||
GELOGD("SKT: CALL offet %lu", nav_table[i * 2]); | |||
nav_table[i * 2 + 1] = reinterpret_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||
nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i])); | |||
GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]); | |||
} | |||
rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret); | |||
return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
rt_ret = | |||
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table, nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE); | |||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret); | |||
GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) | |||
// Create the necessary metadata for the super kernel | |||
@@ -34,8 +34,10 @@ class SuperKernelFactory { | |||
~SuperKernelFactory() { | |||
if (handle_ != nullptr) { | |||
GELOGI("SKT: SKT LIB PATH release."); | |||
if (dlclose(handle_) != 0) { | |||
GELOGW("failed to close handle, message: %s", dlerror()); | |||
if (mmDlclose(handle_) != 0) { | |||
const char *error = mmDlerror(); | |||
GE_IF_BOOL_EXEC(error == nullptr, error = ""); | |||
GELOGW("failed to close handle, message: %s", error); | |||
} | |||
} | |||
}; | |||
@@ -30,49 +30,37 @@ ZeroCopyOffset::ZeroCopyOffset() {} | |||
ZeroCopyOffset::~ZeroCopyOffset() {} | |||
Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list, | |||
const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc, | |||
Status ZeroCopyOffset::InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, | |||
bool &fusion_flag) { | |||
GELOGI("[ZCPY] Start to InitInputDataInfo of %s, total_data_size is %ld, virtual_addr is %p", | |||
op_desc->GetName().c_str(), output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); | |||
if (output_size_list.empty() || virtual_addr_list.empty() || (output_size_list.size() != virtual_addr_list.size())) { | |||
GELOGE(PARAM_INVALID, "Data[%s] init failed: Output size is %zu, Output addr is %zu", op_desc->GetName().c_str(), | |||
output_size_list.size(), virtual_addr_list.size()); | |||
return PARAM_INVALID; | |||
} | |||
basic_addr_ = virtual_addr_list[kDataIndex]; | |||
op_desc->GetName().c_str(), output_size, virtual_addr); | |||
basic_addr_ = virtual_addr; | |||
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | |||
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_RELATIVE_OFFSET, zero_copy_relative_offset_); | |||
GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | |||
"basic_offset_size should be equal to relative_offset_size"); | |||
GELOGI("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); | |||
GELOGD("[ZCPY] zero_copy_basic_offset size is %zu", zero_copy_basic_offset_.size()); | |||
int64_t virtual_addr_offset = op_desc->GetOutputOffset().at(kDataIndex); | |||
GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); | |||
IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | |||
uint32_t out_count = 0; | |||
data_size_ = output_size_list[kDataIndex]; | |||
data_size_ = output_size; | |||
if (!fusion_flag) { | |||
GELOGI("[ZCPY] %s not set l2_fusion.", op_desc->GetName().c_str()); | |||
out_count++; | |||
data_info_.emplace_back(output_size_list[kDataIndex], virtual_addr_list[kDataIndex]); | |||
data_info_.emplace_back(output_size, virtual_addr); | |||
relative_offset_.emplace_back(0); | |||
GELOGI("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size_list[kDataIndex], | |||
virtual_addr_list[kDataIndex]); | |||
GELOGD("[ZCPY] %s size is %ld, virtual_addr is %p.", op_desc->GetName().c_str(), output_size, virtual_addr); | |||
} else { | |||
GELOGI("[ZCPY] set l2_fusion for %s.", op_desc->GetName().c_str()); | |||
for (size_t index = 0; index < zero_copy_basic_offset_.size(); ++index) { | |||
if (zero_copy_basic_offset_.at(index) == virtual_addr_offset) { | |||
out_count++; | |||
uint64_t out_offset = | |||
reinterpret_cast<uint64_t>(virtual_addr_list[kDataIndex]) + zero_copy_relative_offset_.at(index); | |||
int64_t real_data_size = ModelUtils::GetOutputSize(op_desc).at(kDataIndex); | |||
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(out_offset))); | |||
uint64_t out_offset = reinterpret_cast<uint64_t>(virtual_addr) + zero_copy_relative_offset_.at(index); | |||
data_info_.emplace_back(output_size, reinterpret_cast<void *>(static_cast<uintptr_t>(out_offset))); | |||
relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | |||
GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion to %lu, need copy data_size is %ld.", basic_addr_, | |||
out_offset, real_data_size); | |||
out_offset, output_size); | |||
} | |||
} | |||
} | |||
@@ -83,7 +71,6 @@ Status ZeroCopyOffset::InitInputDataInfo(const vector<int64_t> &output_size_list | |||
Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list, | |||
const vector<void *> &virtual_addr_list, const OpDescPtr &op_desc, | |||
const size_t &idx, bool &fusion_flag) { | |||
GELOGI("[ZCPY] Start to InitOutputDataInfo of %s.", op_desc->GetName().c_str()); | |||
int64_t size = input_size_list[idx]; | |||
auto tensor_desc = op_desc->GetInputDescPtr(idx); | |||
GE_CHECK_NOTNULL(tensor_desc); | |||
@@ -92,7 +79,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||
return FAILED; | |||
} | |||
GELOGI("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); | |||
GELOGD("Tensor data size: GetSize=%ld, GetTensorSizeInBytes=%ld", input_size_list[idx], size); | |||
basic_addr_ = virtual_addr_list[idx]; | |||
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_ZERO_COPY_BASIC_OFFSET, zero_copy_basic_offset_); | |||
@@ -100,13 +87,11 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||
GE_CHK_BOOL_EXEC(zero_copy_basic_offset_.size() == zero_copy_relative_offset_.size(), return PARAM_INVALID, | |||
"basic_offset_size should be equal to relative_offset_size"); | |||
int64_t virtual_addr_offset = op_desc->GetInputOffset().at(idx); | |||
GELOGI("virtual_addr_offset is %ld.", virtual_addr_offset); | |||
IsL2Fusion(zero_copy_basic_offset_, virtual_addr_offset, fusion_flag); | |||
uint32_t in_count = 0; | |||
data_size_ = size; | |||
if (!fusion_flag) { | |||
GELOGI("[ZCPY] %s not set l2-fusion.", op_desc->GetName().c_str()); | |||
in_count++; | |||
data_info_.emplace_back(size, virtual_addr_list[idx]); | |||
// op_desc not set l2fusion when fusion_flag is false | |||
@@ -119,7 +104,7 @@ Status ZeroCopyOffset::InitOutputDataInfo(const vector<int64_t> &input_size_list | |||
in_count++; | |||
uint64_t in_offset = reinterpret_cast<uint64_t>(virtual_addr_list[idx]) + zero_copy_relative_offset_.at(index); | |||
int64_t real_data_size = ModelUtils::GetInputSize(op_desc).at(idx); | |||
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(in_offset))); | |||
data_info_.emplace_back(real_data_size, reinterpret_cast<void *>(static_cast<uintptr_t>(in_offset))); | |||
relative_offset_.emplace_back(zero_copy_relative_offset_.at(index)); | |||
GELOGI("[ZCPY] virtual_addr: %p has been l2-fusion from %lu, need copy data_size is %ld.", basic_addr_, | |||
in_offset, real_data_size); | |||
@@ -142,10 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const | |||
void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||
bool fusion_flag, std::set<const void *> &real_virtual_addrs) { | |||
GELOGI("[ZCPY] Start to SetInputOutsideAddrs for virtual_addr %p.", addr); | |||
uint32_t out_count = 0; | |||
if (!fusion_flag) { | |||
GELOGI("[ZCPY] not set l2-fusion for virtual_adr %p.", addr); | |||
out_count++; | |||
std::map<const void *, std::vector<void *>> addr_mapping; | |||
addr_mapping[addr] = {}; | |||
@@ -175,7 +158,6 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||
GELOGI("[ZCPY] Start to SetOutputOutsideAddrs for virtual_addr %p.", addr); | |||
uint32_t out_count = 0; | |||
if (!fusion_flag) { | |||
GELOGI("[ZCPY] not set l2-fusion for virtual_addr %p.", addr); | |||
out_count++; | |||
std::map<const void *, std::vector<void *>> addr_mapping; | |||
addr_mapping[addr] = {}; | |||
@@ -209,7 +191,7 @@ bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *ou | |||
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | |||
void *args_val = static_cast<uint8_t *>(args) + offset; | |||
args_addrs->second.push_back(args_val); | |||
GELOGI("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | |||
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | |||
args, offset); | |||
set_batch_label_flag = true; | |||
} | |||
@@ -42,8 +42,7 @@ class ZeroCopyOffset { | |||
ZeroCopyOffset(); | |||
~ZeroCopyOffset(); | |||
Status InitInputDataInfo(const vector<int64_t> &output_size_list, const vector<void *> &virtual_addr_list, | |||
const OpDescPtr &op_desc, bool &fusion_flag); | |||
Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); | |||
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, | |||
bool fusion_flag, std::set<const void *> &real_virtual_addrs); | |||
@@ -19,6 +19,7 @@ | |||
#include "framework/common/debug/ge_log.h" | |||
#include "framework/common/util.h" | |||
#include "graph/load/new_model_manager/model_utils.h" | |||
#include "common/ge_compiler_options.h" | |||
namespace ge { | |||
const char *const kDefaultBatchLable = "Batch_default"; | |||
@@ -48,7 +49,7 @@ Status ZeroCopyTask::SetTaskArgsOffset(uintptr_t addr, size_t offset) { | |||
it->second.insert(offset); | |||
} | |||
GELOGI("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, | |||
GELOGD("[ZCPY] %s set task, virtual_addr: 0x%lx, args_addr: %p, size: %zu, offset: %zu", name_.c_str(), addr, | |||
args_addr_, args_size_, offset); | |||
return SUCCESS; | |||
} | |||
@@ -157,7 +158,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { | |||
rt_err = rtMemcpyAsync(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE_EX, | |||
stream); | |||
} else { | |||
__builtin_prefetch(args_addr_); | |||
GE_BUILTIN_PREFETCH(args_addr_); | |||
rt_err = rtMemcpy(args_addr_, args_size_, args_info_.data(), args_info_.size(), RT_MEMCPY_HOST_TO_DEVICE); | |||
} | |||
@@ -166,7 +167,7 @@ Status ZeroCopyTask::DistributeParam(bool async_mode, rtStream_t stream) { | |||
return RT_ERROR_TO_GE_STATUS(rt_err); | |||
} | |||
GELOGI("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), | |||
GELOGD("[ZCPY] %s refresh task args success, args_addr: %p, size: %zu, args_info_: %p, length: %zu", name_.c_str(), | |||
args_addr_, args_size_, args_info_.data(), args_info_.size()); | |||
return SUCCESS; | |||
} | |||
@@ -363,7 +363,7 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
for (auto &subgraph : compute_graph->GetAllSubgraphs()) { | |||
(void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | |||
} | |||
GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
} | |||
GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | |||
@@ -396,8 +396,6 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph, | |||
stages.builder.SetOptions(options_); | |||
var_acc_ctrl_.AddGraph(graph_id, compute_graph); | |||
GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); | |||
return SUCCESS; | |||
} | |||
@@ -435,7 +433,7 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||
for (auto &subgraph : new_compute_graph->GetAllSubgraphs()) { | |||
(void)AttrUtils::SetStr(*subgraph, ATTR_NAME_SESSION_GRAPH_ID, session_graph_id); | |||
} | |||
GELOGW("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
GELOGD("Get graph session_graph_id attr failed, set session id to default value: [0]"); | |||
} | |||
GraphNodePtr graph_node = MakeShared<ge::GraphNode>(graph_id); | |||
@@ -468,8 +466,6 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap | |||
stages.builder.SetOptions(options_); | |||
var_acc_ctrl_.AddGraph(graph_id, new_compute_graph); | |||
GELOGI("[GraphManager] add graph success, graph_id = %u.", graph_id); | |||
return SUCCESS; | |||
} | |||
@@ -546,7 +542,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||
const auto &root_subgraph_list = sub_graph_map[compute_graph]; | |||
std::string op_compile_strategy; | |||
(void)AttrUtils::GetStr(compute_graph, ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | |||
GELOGI("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); | |||
GELOGD("OptimizeSubGraphWithMultiThreads Process op_compile_strategy:%s", op_compile_strategy.c_str()); | |||
for (const auto &subgraph : root_subgraph_list) { | |||
if (!op_compile_strategy.empty()) { | |||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | |||
@@ -576,7 +572,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||
vector_future.emplace_back(std::move(f)); | |||
} | |||
} | |||
GELOGI("All sub graph num is %zu", vector_future.size()); | |||
GELOGD("All sub graph num is %zu", vector_future.size()); | |||
for (size_t i = 0; i < vector_future.size(); ++i) { | |||
Status ret_status = vector_future[i].get(); | |||
if (ret_status != SUCCESS) { | |||
@@ -700,7 +696,7 @@ Status GraphManager::SetSubgraph(uint64_t session_id, ComputeGraphPtr compute_gr | |||
/// Multiply optimize subgraph: | |||
/// 1. run lx buffer while build_mode is normal and buffer_optimize is empty or "off_optimize"; | |||
/// 2. run lx fusion or buffer according build_mode and build_step in fe. | |||
GELOGI("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", | |||
GELOGD("Directly optimize subgraph with build mode:%s, and step:%s, buffer_optimize:%s.", | |||
options_.build_mode.c_str(), | |||
options_.build_step.c_str(), | |||
buffer_optimize.c_str()); | |||
@@ -747,7 +743,7 @@ Status GraphManager::PreRunOptimizeOriginalGraph(const GraphNodePtr &graph_node, | |||
GE_CHK_STATUS_RET(graph_pass.Run(compute_graph)); | |||
GE_CHK_STATUS_RET(stages.optimizer.IdentifyReference(compute_graph), "Identify reference failed."); | |||
GELOGI("PreRun:PreRunOptimizeOriginalGraph success."); | |||
GELOGD("PreRun:PreRunOptimizeOriginalGraph success."); | |||
return SUCCESS; | |||
} | |||
@@ -762,10 +758,10 @@ Status GraphManager::PreRunOptimizeSubGraph(const GraphNodePtr &graph_node, | |||
if (options_.build_mode == BUILD_MODE_TUNING && options_.build_step == BUILD_STEP_AFTER_UB_MATCH) { | |||
std::string tuning_path; | |||
(void) GetContext().GetOption(TUNING_PATH, tuning_path); | |||
GELOGI("Dump path:%s.", tuning_path.c_str()); | |||
GELOGD("Dump path:%s.", tuning_path.c_str()); | |||
GraphUtils::DumpGEGraph(compute_graph, "", true, tuning_path); | |||
} | |||
GELOGI("PreRun:PreRunOptimizeSubGraph success."); | |||
GELOGD("PreRun:PreRunOptimizeSubGraph success."); | |||
return SUCCESS; | |||
} | |||
@@ -785,12 +781,12 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||
} | |||
GM_RUN_AND_DUMP_PERF("Build", Build, graph_node, compute_graph, ge_root_model, session_id); | |||
GELOGI("PreRun:PreRunAfterOptimizeSubGraph success."); | |||
GELOGD("PreRun:PreRunAfterOptimizeSubGraph success."); | |||
return SUCCESS; | |||
} | |||
Status GraphManager::SetRtContext(rtContext_t rt_context, rtCtxMode_t mode, uint64_t session_id, uint32_t graph_id) { | |||
GELOGI("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||
GELOGD("set rt_context, session id: %lu, graph id: %u, mode %d, device id:%u.", session_id, graph_id, | |||
static_cast<int>(mode), ge::GetContext().DeviceId()); | |||
rtError_t rt_ret = rtCtxCreate(&rt_context, mode, ge::GetContext().DeviceId()); | |||
@@ -1251,7 +1247,7 @@ Status GraphManager::BuildGraphForUnregisteredOp(const GraphId &graph_id, const | |||
Status GraphManager::BuildGraph(const GraphId &graph_id, const std::vector<GeTensor> &inputs, | |||
GeRootModelPtr &ge_root_model, uint64_t session_id, bool async) { | |||
GELOGI("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||
GELOGD("[BuildGraph] start to build graph, graph_id=%u.", graph_id); | |||
if (inputs.empty()) { | |||
GELOGW("[BuildGraph] BuildGraph warning: empty GeTensor inputs"); | |||
} | |||
@@ -1531,7 +1527,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||
// Set save_original_model flag (ge.save_original_model) | |||
ParseOption(options, SAVE_ORIGINAL_MODEL, options_.save_original_model); | |||
GELOGI("Set save original model flag %s", options_.save_original_model.c_str()); | |||
// Original model file name | |||
ParseOption(options, ORIGINAL_MODEL_FILE, options_.original_model_file); | |||
@@ -1540,16 +1535,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti | |||
ParseOption(options, DYNAMIC_NODE_TYPE, options_.dynamic_node_type); | |||
GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d.", | |||
options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type); | |||
if ((!options_.input_shape.empty() && options_.dynamic_dims.empty()) || | |||
(options_.input_shape.empty() && !options_.dynamic_dims.empty())) { | |||
GELOGE(GRAPH_PARAM_INVALID, "Should set input shape and dynamic dims at the same time"); | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
if ((!options_.input_shape.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType) || | |||
(!options_.dynamic_dims.empty() && options_.dynamic_node_type == kInvalidDynaimcDimsType)) { | |||
GELOGE(GRAPH_PARAM_INVALID, "Should set valid dynamic node type"); | |||
return GRAPH_PARAM_INVALID; | |||
} | |||
// Set Build model and step | |||
ParseOption(options, BUILD_MODE, options_.build_mode); | |||
@@ -2252,7 +2237,7 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||
} | |||
Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { | |||
GELOGI("Start optimize after merge sub graph."); | |||
GELOGD("Start optimize after merge sub graph."); | |||
PassManager after_merge_passes; | |||
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage2::AfterMergePasses::LinkGenMaskNodesPass", | |||
@@ -2502,7 +2487,7 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||
ComputeGraphPtr compute_graph_tmp = sub_graph_info_ptr->GetSubGraph(); | |||
const std::string &engine_name = sub_graph_info_ptr->GetEngineName(); | |||
GELOGI("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", | |||
GELOGD("ProcessSubGraphWithMultiThreads start, graph name is %s, engine_name is %s, thread id is %lu", | |||
compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | |||
pthread_self()); | |||
GE_DUMP(compute_graph_tmp, "OptimizeSubGraphBefore"); | |||
@@ -2514,11 +2499,11 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||
GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | |||
return ret; | |||
} else { | |||
GELOGI("SubGraph optimize success %s", engine_name.c_str()); | |||
GELOGD("SubGraph optimize success %s", engine_name.c_str()); | |||
} | |||
GE_DUMP(compute_graph_tmp, "OptimizeSubGraphAfter"); | |||
sub_graph_info_ptr->SetSubGraph(compute_graph_tmp); | |||
GELOGI("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", | |||
GELOGD("ProcessSubGraphWithMultiThreads end, graph name is %s, engine_name is %s, thread id is %lu", | |||
compute_graph_tmp != nullptr ? compute_graph_tmp->GetName().c_str() : "", engine_name.c_str(), | |||
pthread_self()); | |||
} else { | |||
@@ -2849,13 +2834,15 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
if (args.graph_node->graph_run_async_listener_ != nullptr) { | |||
args.graph_node->graph_run_async_listener_->SetCallback(args.callback); | |||
} | |||
Status ret; | |||
// parse inputs.dims to vector<vector<uint64_t>> dynamic_dims | |||
if (graph_manager->ParseInputsDims(args.input_tensor) != SUCCESS) { | |||
GELOGE(PARAM_INVALID, "Parse input dims failed."); | |||
ret = graph_manager->ParseInputsDims(args.input_tensor); | |||
if (ret != SUCCESS) { | |||
ReturnError(graph_manager, args.callback, ret, "ParseInputsDims failed, thread exit."); | |||
args.graph_node->Unlock(); | |||
return; | |||
} | |||
Status ret; | |||
if (!args.graph_node->GetLoadFlag()) { | |||
ret = graph_manager->LoadGraphAsync(args.ge_root_model, args.graph_node); | |||
if (ret != SUCCESS || args.ge_root_model == nullptr) { | |||
@@ -2880,12 +2867,12 @@ void GraphManager::RunThread(GraphManager *graph_manager) { | |||
ret = graph_manager->graph_executor_.ExecuteGraphAsync(args.graph_id, args.graph_node->GetGeRootModel(), | |||
args.input_tensor); | |||
args.graph_node->SetRunFlag(false); | |||
args.graph_node->Unlock(); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "[GraphManager] Run graph async failed, graph_id=%u.", args.graph_id); | |||
StopQueue(graph_manager); | |||
ReturnError(graph_manager, args.callback, ret, "ExecuteGraphAsync failed, thread exit."); | |||
args.graph_node->Unlock(); | |||
return; | |||
} | |||
args.graph_node->Unlock(); | |||
GELOGI("[GraphManager] Run graph async success, graph_id=%u.", args.graph_id); | |||
} | |||
} | |||
@@ -92,13 +92,13 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen | |||
GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str()); | |||
if (var_addr_mgr_map_.count(var_key) == 0) { | |||
uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() + | |||
reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(), | |||
TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(), | |||
TypeUtils::DataTypeToSerialString(tensor_desc.GetDataType()).c_str()); | |||
VarAddrMgr var_addr_mgr; | |||
var_addr_mgr.address = reinterpret_cast<uint8_t *>(reinterpret_cast<std::uintptr_t>(logic_address)); | |||
var_addr_mgr.offset = reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
var_addr_mgr.address = reinterpret_cast<uint8_t *>(static_cast<std::uintptr_t>(logic_address)); | |||
var_addr_mgr.offset = static_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address)); | |||
var_addr_mgr.tensor_desc = tensor_desc; | |||
var_addr_mgr.memory_type = memory_type; | |||
var_addr_mgr_map_[var_key] = var_addr_mgr; | |||
@@ -510,7 +510,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||
} | |||
result = var_resource_->SaveVarAddr( | |||
var_name, tensor_desc, reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||
var_name, tensor_desc, reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||
if (result != SUCCESS) { | |||
GELOGE(ge::INTERNAL_ERROR, "AssignVarMem by offset failed."); | |||
return ge::INTERNAL_ERROR; | |||
@@ -527,7 +527,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||
result = var_resource_->GetCurVarDesc(var_name, cur_tensor_desc); | |||
if (result != SUCCESS) { | |||
var_resource_->SetVarAddr(var_name, tensor_desc, | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||
return SUCCESS; | |||
} | |||
@@ -542,7 +542,7 @@ ge::Status VarManager::AssignVarMem(const std::string &var_name, const ge::GeTen | |||
ge::TypeUtils::FormatToSerialString(cur_tensor_desc.GetFormat()).c_str(), | |||
cur_tensor_desc.GetShape().GetDims().size()); | |||
var_resource_->SetVarAddr(var_name, tensor_desc, | |||
reinterpret_cast<uint8_t *>(reinterpret_cast<uintptr_t>(mem_offset)), memory_type); | |||
reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(mem_offset)), memory_type); | |||
} | |||
return SUCCESS; | |||
@@ -642,7 +642,7 @@ ge::Status VarManager::SyncBroadCastData2Var(uint32_t graph_id, const std::strin | |||
bool VarManager::IsVarAddr(const int64_t &offset) { | |||
std::lock_guard<std::recursive_mutex> lock(mutex_); | |||
if (var_resource_ == nullptr) { | |||
GELOGW("VarManager has not been init."); | |||
GELOGD("VarManager has not been init."); | |||
return false; | |||
} | |||
return var_resource_->IsVarAddr(offset); | |||
@@ -374,7 +374,7 @@ Status TransVarDataUtils::SyncVarData2BroadCast(const string &var_name, const ge | |||
GE_MAKE_GUARD_RTMEM(src_host_addr); | |||
GE_CHK_STATUS_RET(SyncTensorToHost(var_name, src_tensor_desc, &src_host_addr, src_addr_size, session_id)); | |||
GELOGI("src_addr_size: %u, dst_addr_size: %u", src_addr_size, dst_addr_size); | |||
GELOGI("src_addr_size: %ld, dst_addr_size: %ld", src_addr_size, dst_addr_size); | |||
GE_CHK_BOOL_RET_STATUS(src_addr_size == dst_addr_size, FAILED, "var data size is not equal broadcast "); | |||
GE_CHK_RT_RET(rtMemcpy(dst_addr, dst_addr_size, src_host_addr, src_addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
@@ -403,7 +403,7 @@ Status TransVarDataUtils::SyncTensorToHost(const string &var_name, const ge::GeT | |||
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, src_tensor_desc, &src_addr)); | |||
uint8_t *mem_addr = | |||
src_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
GE_CHK_RT_RET(rtMallocHost(reinterpret_cast<void **>(host_addr), src_tensor_size)); | |||
@@ -420,7 +420,7 @@ Status TransVarDataUtils::SyncTensorToDevice(const string &var_name, const uint8 | |||
GE_CHK_STATUS_RET(VarManager::Instance(session_id)->GetVarAddr(var_name, dst_tensor_desc, &dst_addr)); | |||
uint8_t *mem_addr = | |||
dst_addr - | |||
static_cast<int64_t>(reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>(static_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemLogicBase())) + | |||
static_cast<int64_t>( | |||
reinterpret_cast<uintptr_t>(VarManager::Instance(session_id)->GetVarMemoryBase(RT_MEMORY_HBM))); | |||
GE_CHK_RT_RET(rtMemcpy(mem_addr, addr_size, host_addr, addr_size, RT_MEMCPY_HOST_TO_DEVICE)); | |||
@@ -501,7 +501,7 @@ Status TransVarDataUtils::TransAllVarData(const vector<NodePtr> &variable_nodes, | |||
} | |||
Status TransVarDataUtils::CopyVarData(const ComputeGraphPtr &compute_graph, uint64_t session_id, uint32_t device_id) { | |||
GELOGI("CopyVarData start: session_id:%lu.", session_id); | |||
GELOGD("CopyVarData start: session_id:%lu.", session_id); | |||
if (compute_graph == nullptr) { | |||
GELOGE(FAILED, "compute_graph is nullptr"); | |||
return FAILED; | |||
@@ -32,7 +32,7 @@ Debug::~Debug() = default; | |||
void Debug::DumpProto(const Message &proto, const char *file) { | |||
std::string file_path = RealPath(file); | |||
int fd = open(file_path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); | |||
int fd = mmOpen2(file_path.c_str(), M_WRONLY | M_CREAT | O_TRUNC, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD | M_UMASK_OTHREAD); | |||
if (fd == -1) { | |||
GELOGW("Write %s failed", file_path.c_str()); | |||
return; | |||
@@ -40,7 +40,7 @@ void Debug::DumpProto(const Message &proto, const char *file) { | |||
auto output = ge::MakeShared<FileOutputStream>(fd); | |||
if (output == nullptr) { | |||
GELOGW("create output failed."); | |||
if (close(fd) != 0) { | |||
if (mmClose(fd) != 0) { | |||
GELOGW("close fd failed."); | |||
} | |||
return; | |||
@@ -49,7 +49,7 @@ void Debug::DumpProto(const Message &proto, const char *file) { | |||
if (!ret) { | |||
GELOGW("dump proto failed."); | |||
} | |||
if (close(fd) != 0) { | |||
if (mmClose(fd) != 0) { | |||
GELOGW("close fd failed."); | |||
} | |||
} | |||
@@ -17,7 +17,6 @@ | |||
#ifndef GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | |||
#define GE_GRAPH_MANAGER_UTIL_DEBUG_H_ | |||
#include <fcntl.h> | |||
#include <sys/stat.h> | |||
#include <sys/types.h> | |||
#include <time.h> | |||
@@ -25,7 +24,6 @@ | |||
#include <google/protobuf/io/coded_stream.h> | |||
#include <google/protobuf/io/zero_copy_stream_impl.h> | |||
#include <google/protobuf/text_format.h> | |||
#include <unistd.h> | |||
#include <algorithm> | |||
#include <fstream> | |||
#include <iosfwd> | |||
@@ -58,8 +58,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { | |||
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
auto peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | |||
GE_IF_BOOL_EXEC( | |||
peer_out_anchor == nullptr, GELOGW("peer_out_anchor is nullptr! node: %s", node->GetName().c_str()); continue); | |||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | |||
ge::NodePtr src_node = peer_out_anchor->GetOwnerNode(); | |||
src_index_list = node_op_desc->GetSrcIndex(); | |||
@@ -242,11 +241,11 @@ Status GraphOptimize::OptimizeGraphBeforeBuildForRts(ComputeGraphPtr &compute_gr | |||
} | |||
auto graph_optimizer = instance_ptr->OpsKernelManagerObj().GetAllGraphOptimizerObjsByPriority(); | |||
GELOGI("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.", | |||
GELOGD("optimize by opskernel in graph optimize before build phase. num of graph_optimizer is %zu.", | |||
graph_optimizer.size()); | |||
Status ret = SUCCESS; | |||
string exclude_core_Type = (core_type_ == kVectorCore) ? kAicoreEngine : kVectorEngine; | |||
GELOGI("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", | |||
GELOGD("[OptimizeGraphBeforeBuildForRts]: engine type will exclude: %s, core_type_: %s", | |||
exclude_core_Type.c_str(), core_type_.c_str()); | |||
if (graph_optimizer.size() != 0) { | |||
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | |||
@@ -1,397 +0,0 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#include "graph/optimize/optimizer/allreduce_fusion_pass.h" | |||
#include <string> | |||
#include "common/debug/log.h" | |||
#include "framework/common/debug/ge_log.h" | |||
#include "common/types.h" | |||
#include "common/util.h" | |||
#include "graph/anchor.h" | |||
#include "graph/node.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/utils/attr_utils.h" | |||
#include "graph/utils/graph_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
#include "graph/debug/ge_attr_define.h" | |||
#include "hccl/base.h" | |||
#include "hccl/hcom.h" | |||
namespace ge { | |||
Status AllReducePass::Run(ge::ComputeGraphPtr graph) { | |||
GELOGI("FusionAllReducePass: start"); | |||
std::vector<NodePtr> fusionOps; | |||
std::vector<float> inputGradientSize; | |||
std::vector<float> inputGradientTime; | |||
static const float inputGradientSizeTemp = 0.0; | |||
static const float inputGradientTimeTemp = 0.0; | |||
// Get all nodes | |||
for (auto nodePtr : graph->GetDirectNode()) { | |||
GE_IF_BOOL_EXEC(nullptr == nodePtr, GELOGW("FusionAllReducePass: null node exists"); continue;); | |||
ge::OpDescPtr opDescPtr = nodePtr->GetOpDesc(); | |||
GE_IF_BOOL_EXEC(nullptr == opDescPtr, | |||
GELOGW("FusionAllReducePass: desc of node %s is null", nodePtr->GetName().c_str()); | |||
continue;) | |||
GE_IF_BOOL_EXEC(HCOMALLREDUCE == opDescPtr->GetType(), | |||
// the op is allreduce and fusion > 0, then run fusion | |||
std::int64_t hcom_fusion = 1; | |||
GE_IF_BOOL_EXEC(!ge::AttrUtils::GetInt(opDescPtr, HCOM_ATTR_FUSION, hcom_fusion), | |||
GELOGW("FusionAllReducePass: not get hcom_fusion from opDescPtr " | |||
"by HCOM_ATTR_FUSION")); | |||
GELOGI("after GetInt, hcom_fusion is :%ld", hcom_fusion); GE_IF_BOOL_EXEC( | |||
hcom_fusion > 0, fusionOps.push_back(nodePtr); inputGradientSize.push_back(inputGradientSizeTemp); | |||
inputGradientTime.push_back(inputGradientTimeTemp);)) | |||
} | |||
// The number of allredecue operator must be more than 1 | |||
GE_IF_BOOL_EXEC(1 >= fusionOps.size(), GELOGW("FusionAllReducePass NOT_CHANGED: the graph has " | |||
"%lu allreduce operator", | |||
fusionOps.size()); | |||
return NOT_CHANGED;); | |||
string group = "group"; | |||
u32 gradientNum = fusionOps.size(); | |||
string model_name_str = graph->GetName(); | |||
const char *model_name = model_name_str.c_str(); | |||
model_feature modelFeature{model_name, gradientNum, inputGradientSize.data(), inputGradientTime.data()}; | |||
u32 segmentNum = 0; | |||
u32 segmentIndex[HCCL_MAX_SEGMENT_NUM] = {}; | |||
// Call HCCL function: hcom_gradient_segment | |||
GELOGI("FusionAllReducePass: invoking hcom_get_split_strategy"); | |||
GE_IF_BOOL_EXEC(HCCL_SUCCESS != hcom_get_split_strategy(group.c_str(), &modelFeature, HCCL_MAX_SEGMENT_NUM, | |||
&segmentNum, segmentIndex), | |||
GELOGE(FAILED, "FusionAllReducePass FAILED: the graph has %lu allreduce operator", fusionOps.size()); | |||
return FAILED;) | |||
GELOGI("FusionAllReducePass: invoke hcom_get_split_strategy successfully"); | |||
// check whether segmentNum is legal or not | |||
GE_IF_BOOL_EXEC((HCCL_MAX_SEGMENT_NUM < segmentNum || 1 > segmentNum || segmentNum > gradientNum), | |||
GELOGE(FAILED, | |||
"FusionAllReducePass FAILED: illegal segmentNum=%u, " | |||
"HCCL_MAX_SEGMENT_NUM=%u, gradientNum=%u", | |||
segmentNum, HCCL_MAX_SEGMENT_NUM, gradientNum); | |||
return FAILED;); | |||
// check whether segmentIndex is legal or not | |||
GE_IF_BOOL_EXEC((segmentIndex[segmentNum - 1] != gradientNum - 1), | |||
GELOGE(FAILED, | |||
"FusionAllReducePass FAILED: illegal segmentIndex[0]=%u, " | |||
"segmentIndex[segmentNum-1]=%u, gradientNum=%u", | |||
segmentIndex[0], segmentIndex[(segmentNum)-1], gradientNum); | |||
return FAILED;); | |||
for (uint32_t i = 0; i < segmentNum - 1; i++) { | |||
GE_IF_BOOL_EXEC(segmentIndex[i] >= segmentIndex[i + 1], GELOGE(FAILED, | |||
"FusionAllReducePass FAILED: illegal " | |||
"segmentIndex[%u]=%u, segmentIndex[%u]=%u", | |||
i, segmentIndex[i], i + 1, segmentIndex[i + 1]); | |||
return FAILED;); | |||
} | |||
// check whether fusion is needed or not | |||
GE_IF_BOOL_EXEC( | |||
segmentNum == gradientNum, | |||
GELOGE(NOT_CHANGED, "FusionAllReducePass NOT_CHANGED: segmentNum=%u, gradientNum=%u", segmentNum, gradientNum); | |||
return NOT_CHANGED;) | |||
std::unordered_set<void *> anchorPtrSet; | |||
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataAnchor; | |||
std::vector<ge::OutDataAnchorPtr> fusionOpPeerOutDataToInControl; | |||
std::vector<ge::OutControlAnchorPtr> fusionOpPeerOutControlAnchor; | |||
std::vector<std::pair<int, ge::InDataAnchorPtr>> fusionOpPeerInDataAnchor; | |||
std::vector<std::pair<int, ge::InControlAnchorPtr>> fusionOpPeerInControlFromOutData; | |||
std::vector<ge::InControlAnchorPtr> fusionOpPeerInControlAnchor; | |||
ge::OutControlAnchorPtr previousNewAllreduceOutControlAnchor = nullptr; | |||
// Traversing the segmentNum | |||
uint32_t start = 0; | |||
uint32_t end = 0; | |||
for (uint32_t segmentIdx = 0; segmentIdx < segmentNum; segmentIdx++) { | |||
end = segmentIndex[segmentIdx]; | |||
GE_IF_BOOL_EXEC(end - start < 1, | |||
GELOGI("FusionAllReducePass: segmentIndex[%u]=%u", segmentIdx, segmentIndex[segmentIdx]); | |||
start = end + 1; continue;); | |||
ge::OpDescPtr originDescPtr = fusionOps[start]->GetOpDesc(); | |||
GE_CHECK_NOTNULL(originDescPtr); | |||
ge::OpDescPtr newAllreduceDesc = AttrUtils::CloneOpDesc(originDescPtr); | |||
GE_CHECK_NOTNULL(newAllreduceDesc); | |||
// Cleat buffer | |||
anchorPtrSet.clear(); | |||
fusionOpPeerOutDataAnchor.clear(); | |||
fusionOpPeerOutDataToInControl.clear(); | |||
fusionOpPeerOutControlAnchor.clear(); | |||
fusionOpPeerInDataAnchor.clear(); | |||
fusionOpPeerInControlFromOutData.clear(); | |||
fusionOpPeerInControlAnchor.clear(); | |||
// Traversing the Allreduce operators of each group | |||
int outDataAnchorIndex = 0; | |||
GE_CHK_STATUS_RET(GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[start]), | |||
"Get peer outDataAnchor to inDataAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerInAnchorToOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||
fusionOps[start]), | |||
"Get peer inDataAnchor and inControlAnchor to outDataAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[start]), | |||
"Get peer outDataAnchor to inControlAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[start]), | |||
"Get peer outControlAnchor to inControlAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[start]), | |||
"Get peer outControlAnchor from inControlAnchor failed"); | |||
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[start]), "FusionAllReducePass FAILED: remove node %s\n.", | |||
fusionOps[start]->GetName().c_str()); | |||
for (uint32_t idx = start + 1; idx <= end; idx++) { | |||
GE_CHK_STATUS_RET( | |||
GetPeerOutDataToInData(anchorPtrSet, fusionOpPeerOutDataAnchor, fusionOps[idx], newAllreduceDesc), | |||
"Get peer outDataAnchor to inDataAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerOutDataToInControl(anchorPtrSet, fusionOpPeerOutDataToInControl, fusionOps[idx]), | |||
"Get peer outDataAnchor to inControlAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerOutControlToInControl(anchorPtrSet, fusionOpPeerOutControlAnchor, fusionOps[idx]), | |||
"Get peer outControlAnchor to inControlAnchor failed"); | |||
GE_CHK_STATUS_RET( | |||
GetPeerAnchorFromOutData(anchorPtrSet, fusionOpPeerInDataAnchor, fusionOpPeerInControlFromOutData, | |||
fusionOps[idx], newAllreduceDesc, outDataAnchorIndex), | |||
"Get peerAnchor from outDataAnchor failed"); | |||
GE_CHK_STATUS_RET(GetPeerInControlFromOutControl(anchorPtrSet, fusionOpPeerInControlAnchor, fusionOps[idx]), | |||
"Get peer outControlAnchor from inControlAnchor failed"); | |||
// Delete the node | |||
GE_CHK_STATUS_RET(graph->RemoveNode(fusionOps[idx]), "FusionAllReducePass FAILED: remove node %s\n.", | |||
fusionOps[idx]->GetName().c_str()); | |||
} | |||
NodePtr newAllReducePtr = graph->AddNode(newAllreduceDesc); | |||
GE_CHECK_NOTNULL(newAllReducePtr); | |||
// Link the inputDataAnchor | |||
for (uint32_t i = 0; i < fusionOpPeerOutDataAnchor.size(); i++) { | |||
GE_CHK_STATUS_RET( | |||
GraphUtils::AddEdge(fusionOpPeerOutDataAnchor[i], newAllReducePtr->GetInDataAnchor(static_cast<int>(i))), | |||
"FusionAllReducePass FAILED: add input data edge failed"); | |||
} | |||
// Link the inputControlAnchor | |||
for (uint32_t i = 0; i < fusionOpPeerOutControlAnchor.size(); i++) { | |||
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutControlAnchor[i], newAllReducePtr->GetInControlAnchor()), | |||
"FusionAllReducePass FAILED: add input control edge failed"); | |||
} | |||
for (uint32_t i = 0; i < fusionOpPeerOutDataToInControl.size(); i++) { | |||
GE_CHK_STATUS_RET(GraphUtils::AddEdge(fusionOpPeerOutDataToInControl[i], newAllReducePtr->GetInControlAnchor()), | |||
"FusionAllReducePass FAILED: add edge from out data to incontrol " | |||
"failed"); | |||
} | |||
// Link the outputDataAnchor | |||
for (uint32_t i = 0; i < fusionOpPeerInDataAnchor.size(); i++) { | |||
auto peerInDataAnchor = fusionOpPeerInDataAnchor[i].second; | |||
GE_CHK_STATUS_RET( | |||
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInDataAnchor[i].first), peerInDataAnchor), | |||
"FusionAllReducePass FAILED: add output data edge failed"); | |||
} | |||
for (uint32_t i = 0; i < fusionOpPeerInControlFromOutData.size(); i++) { | |||
auto peerInControlAnchor = fusionOpPeerInControlFromOutData[i].second; | |||
GE_CHK_STATUS_RET( | |||
GraphUtils::AddEdge(newAllReducePtr->GetOutDataAnchor(fusionOpPeerInControlFromOutData[i].first), | |||
peerInControlAnchor), | |||
"FusionAllReducePass FAILED: add edge from out data to in control " | |||
"failed"); | |||
} | |||
// Link the outputControlAnchor | |||
for (uint32_t i = 0; i < fusionOpPeerInControlAnchor.size(); i++) { | |||
GE_CHK_STATUS_RET(GraphUtils::AddEdge(newAllReducePtr->GetOutControlAnchor(), fusionOpPeerInControlAnchor[i]), | |||
"FusionAllReducePass FAILED: add output control edge failed"); | |||
} | |||
// Link the newAllreduce | |||
if (segmentIdx > 0 && previousNewAllreduceOutControlAnchor != nullptr) { | |||
GE_CHK_STATUS_RET( | |||
GraphUtils::AddEdge(previousNewAllreduceOutControlAnchor, newAllReducePtr->GetInControlAnchor()), | |||
"FusionAllReducePass FAILED: add input previous control edge failed"); | |||
} | |||
previousNewAllreduceOutControlAnchor = newAllReducePtr->GetOutControlAnchor(); | |||
start = end + 1; | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||
ge::NodePtr &srcNodePtr) { | |||
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||
if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||
peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||
anchorSet.insert(peerOutDataAnchor.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerInAnchorToOutData( | |||
std::unordered_set<void *> &anchorSet, std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||
std::vector<std::pair<int, ge::InControlAnchorPtr>> &fusionOpPeerInControlFromOutData, ge::NodePtr &srcNodePtr) { | |||
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;); | |||
for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;); | |||
if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||
pairPeerInDataAnchor.first = 0; | |||
pairPeerInDataAnchor.second = peerInDataAnchor; | |||
fusionOpPeerInDataAnchor.push_back(pairPeerInDataAnchor); | |||
anchorSet.insert(peerInDataAnchor.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)); | |||
} | |||
} | |||
for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;); | |||
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||
std::pair<uint32_t, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||
pairPeerInControlAnchorFromData.first = 0; | |||
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||
fusionOpPeerInControlFromOutData.push_back(pairPeerInControlAnchorFromData); | |||
anchorSet.insert(peerInControlAnchorFromData.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)); | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr) { | |||
for (auto inDataAnchor : srcNodePtr->GetAllInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(inDataAnchor == nullptr, continue;); | |||
OutDataAnchorPtr peerOutDataAnchor = inDataAnchor->GetPeerOutAnchor(); | |||
GE_IF_BOOL_EXEC(peerOutDataAnchor == nullptr, continue;); | |||
if (anchorSet.count(peerOutDataAnchor.get()) == 0) { | |||
peerOutDataAnchorVec.push_back(peerOutDataAnchor); | |||
anchorSet.insert(peerOutDataAnchor.get()); | |||
if (dstOpDescPtr->AddInputDesc(inDataAnchor->GetOwnerNode()->GetOpDesc()->GetInputDesc(inDataAnchor->GetIdx())) != | |||
ge::GRAPH_SUCCESS) { | |||
GELOGW("GetPeerOutDataToInData: AddInputDesc failed"); | |||
} | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataAnchor, inDataAnchor)); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, | |||
ge::NodePtr &srcNodePtr) { | |||
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||
GE_CHECK_NOTNULL(inControlAnchor); | |||
for (auto peerOutDataToInControl : inControlAnchor->GetPeerOutDataAnchors()) { | |||
GE_IF_BOOL_EXEC(peerOutDataToInControl == nullptr, continue;); | |||
if (anchorSet.count(peerOutDataToInControl.get()) == 0) { | |||
peerOutDataToInControlVec.push_back(peerOutDataToInControl); | |||
anchorSet.insert(peerOutDataToInControl.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutDataToInControl, inControlAnchor)); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||
ge::NodePtr &srcNodePtr) { | |||
InControlAnchorPtr inControlAnchor = srcNodePtr->GetInControlAnchor(); | |||
GE_CHECK_NOTNULL(inControlAnchor); | |||
for (auto peerOutControlAnchor : inControlAnchor->GetPeerOutControlAnchors()) { | |||
GE_IF_BOOL_EXEC(peerOutControlAnchor == nullptr, continue;); | |||
if (anchorSet.count(peerOutControlAnchor.get()) == 0) { | |||
peerOutControlToInControlVec.push_back(peerOutControlAnchor); | |||
anchorSet.insert(peerOutControlAnchor.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(peerOutControlAnchor, inControlAnchor)); | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerAnchorFromOutData( | |||
std::unordered_set<void *> &anchorSet, vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, ge::NodePtr &srcNodePtr, | |||
ge::OpDescPtr &dstOpDescPtr, int &index) { | |||
for (auto outDataAnchor : srcNodePtr->GetAllOutDataAnchors()) { | |||
GE_IF_BOOL_EXEC(outDataAnchor == nullptr, continue;) | |||
if (outDataAnchor->GetPeerInDataAnchors().size() > 0 || outDataAnchor->GetPeerInControlAnchors().size() > 0) { | |||
if (dstOpDescPtr->AddOutputDesc( | |||
outDataAnchor->GetOwnerNode()->GetOpDesc()->GetOutputDesc(outDataAnchor->GetIdx())) != ge::GRAPH_SUCCESS) { | |||
GELOGW("GetPeerAnchorFromOutData: AddOutputDesc failed"); | |||
} | |||
index++; | |||
} | |||
for (auto peerInDataAnchor : outDataAnchor->GetPeerInDataAnchors()) { | |||
GE_IF_BOOL_EXEC(peerInDataAnchor == nullptr, continue;) | |||
if (anchorSet.count(peerInDataAnchor.get()) == 0) { | |||
std::pair<int, ge::InDataAnchorPtr> pairPeerInDataAnchor; | |||
pairPeerInDataAnchor.first = index; | |||
pairPeerInDataAnchor.second = peerInDataAnchor; | |||
peerInDataFromOutDataVec.push_back(pairPeerInDataAnchor); | |||
anchorSet.insert(peerInDataAnchor.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInDataAnchor)) | |||
} | |||
} | |||
for (auto peerInControlAnchorFromData : outDataAnchor->GetPeerInControlAnchors()) { | |||
GE_IF_BOOL_EXEC(peerInControlAnchorFromData == nullptr, continue;) | |||
if (anchorSet.count(peerInControlAnchorFromData.get()) == 0) { | |||
std::pair<int, ge::InControlAnchorPtr> pairPeerInControlAnchorFromData; | |||
pairPeerInControlAnchorFromData.first = index; | |||
pairPeerInControlAnchorFromData.second = peerInControlAnchorFromData; | |||
peerInControlFromOutDataVec.push_back(pairPeerInControlAnchorFromData); | |||
anchorSet.insert(peerInControlAnchorFromData.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outDataAnchor, peerInControlAnchorFromData)) | |||
} | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
Status AllReducePass::GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||
ge::NodePtr &srcNodePtr) { | |||
OutControlAnchorPtr outControlAnchor = srcNodePtr->GetOutControlAnchor(); | |||
GE_CHECK_NOTNULL(outControlAnchor); | |||
for (auto peerInControlAnchor : outControlAnchor->GetPeerInControlAnchors()) { | |||
GE_IF_BOOL_EXEC(peerInControlAnchor == nullptr, continue;) | |||
if (anchorSet.count(peerInControlAnchor.get()) == 0) { | |||
peerInControlFromOutControlVec.push_back(peerInControlAnchor); | |||
anchorSet.insert(peerInControlAnchor.get()); | |||
GE_CHK_STATUS_RET(GraphUtils::RemoveEdge(outControlAnchor, peerInControlAnchor)) | |||
} | |||
} | |||
return SUCCESS; | |||
} | |||
} // namespace ge |
@@ -1,56 +0,0 @@ | |||
/** | |||
* Copyright 2020 Huawei Technologies Co., Ltd | |||
* | |||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||
* you may not use this file except in compliance with the License. | |||
* You may obtain a copy of the License at | |||
* | |||
* http://www.apache.org/licenses/LICENSE-2.0 | |||
* | |||
* Unless required by applicable law or agreed to in writing, software | |||
* distributed under the License is distributed on an "AS IS" BASIS, | |||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
* See the License for the specific language governing permissions and | |||
* limitations under the License. | |||
*/ | |||
#ifndef GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||
#define GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ | |||
#include <unordered_set> | |||
#include <utility> | |||
#include <vector> | |||
#include "inc/graph_pass.h" | |||
namespace ge { | |||
// | |||
class AllReducePass : public GraphPass { | |||
public: | |||
Status Run(ge::ComputeGraphPtr graph) override; | |||
private: | |||
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, ge::NodePtr &srcNodePtr, | |||
ge::OpDescPtr &dstOpDescPtr); | |||
Status GetPeerOutDataToInControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutDataAnchorPtr> &peerOutDataToInControlVec, ge::NodePtr &srcNodePtr); | |||
Status GetPeerOutControlToInControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::OutControlAnchorPtr> &peerOutControlToInControlVec, | |||
ge::NodePtr &srcNodePtr); | |||
Status GetPeerAnchorFromOutData(std::unordered_set<void *> &anchorSet, | |||
vector<std::pair<int, ge::InDataAnchorPtr>> &peerInDataFromOutDataVec, | |||
vector<std::pair<int, ge::InControlAnchorPtr>> &peerInControlFromOutDataVec, | |||
ge::NodePtr &srcNodePtr, ge::OpDescPtr &dstOpDescPtr, int &index); | |||
Status GetPeerInControlFromOutControl(std::unordered_set<void *> &anchorSet, | |||
vector<ge::InControlAnchorPtr> &peerInControlFromOutControlVec, | |||
ge::NodePtr &srcNodePtr); | |||
Status GetPeerOutDataToInData(std::unordered_set<void *> &anchorSet, | |||
std::vector<ge::OutDataAnchorPtr> &peerOutDataAnchorVec, | |||
ge::NodePtr &srcNodePtr); | |||
Status GetPeerInAnchorToOutData(std::unordered_set<void *> &anchorSet, | |||
std::vector<std::pair<int, ge::InDataAnchorPtr>> &fusionOpPeerInDataAnchor, | |||
std::vector<std::pair<int, ge::InControlAnchorPtr>>&fusionOpPeerInControlFromOutData, | |||
ge::NodePtr &srcNodePtr); | |||
}; | |||
} // namespace ge | |||
#endif // GE_GRAPH_OPTIMIZE_OPTIMIZER_ALLREDUCE_FUSION_PASS_H_ |
@@ -50,7 +50,7 @@ Status EnginePlacer::Check() const { | |||
Status EnginePlacer::Run() { | |||
std::lock_guard<std::mutex> lock(check_support_cost_mutex); | |||
GELOGI("Engine placer starts."); | |||
GELOGD("Engine placer starts."); | |||
if (Check() != SUCCESS) { | |||
return FAILED; | |||
} | |||
@@ -101,7 +101,7 @@ Status EnginePlacer::Run() { | |||
for (auto &it : ge::GELib::GetInstance()->DNNEngineManagerObj().GetCheckSupportCost()) { | |||
GEEVENT("The time cost of %s::CheckSupported is [%lu] micro second.", it.first.c_str(), it.second); | |||
} | |||
GELOGI("Engine placer ends."); | |||
GELOGD("Engine placer ends."); | |||
return is_check_support_success ? SUCCESS : FAILED; | |||
} | |||
@@ -223,7 +223,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||
GELOGE(GE_GRAPH_UNSUPPORTED, "Cannot call merging in partition mode"); | |||
return FAILED; | |||
} | |||
GELOGI("Graph merge starts."); | |||
GELOGD("Graph merge starts."); | |||
// check input param | |||
for (const auto &it : sub_graph_list) { | |||
if (it == nullptr) { | |||
@@ -261,7 +261,7 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co | |||
return FAILED; | |||
} | |||
GE_TIMESTAMP_END(MergeSubGraphEnginePlacerRun, "GraphPartitioner::MergeGraphEnginePlacerRun"); | |||
GELOGI("Graph merge ends."); | |||
GELOGD("Graph merge ends."); | |||
return SUCCESS; | |||
} | |||
@@ -581,7 +581,7 @@ Status ge::GraphPartitioner::Initialize(ge::ComputeGraphPtr compute_graph) { | |||
new_cluster->engine_name_.c_str(), new_cluster->index_, new_cluster->stream_label_.c_str()); | |||
temp_index++; | |||
} | |||
GELOGI("Initialize ends."); | |||
GELOGD("Initialize ends."); | |||
return SUCCESS; | |||
} | |||
@@ -754,11 +754,11 @@ void ge::GraphPartitioner::MarkClusters() { | |||
} | |||
} | |||
} | |||
GELOGI("MarkClusters ends."); | |||
GELOGD("MarkClusters ends."); | |||
} | |||
Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) { | |||
GELOGI("SplitSubGraphs starts."); | |||
GELOGD("SplitSubGraphs starts."); | |||
if (compute_graph == nullptr) { | |||
GELOGE(FAILED, "parameter ptr is null."); | |||
return FAILED; | |||
@@ -823,7 +823,7 @@ Status ge::GraphPartitioner::SplitSubGraphs(ge::ComputeGraphPtr compute_graph) { | |||
} | |||
} | |||
} | |||
GELOGI("SplitSubGraphs ends."); | |||
GELOGD("SplitSubGraphs ends."); | |||
return SUCCESS; | |||
} | |||
@@ -46,7 +46,7 @@ Status AtomicAddrCleanPass::Run(ComputeGraphPtr graph) { | |||
} | |||
} | |||
if (atomic_node_vec.empty()) { | |||
GELOGI("There is no atomic node. Ignore atomicAddrClean pass."); | |||
GELOGD("There is no atomic node. Ignore atomicAddrClean pass."); | |||
return SUCCESS; | |||
} | |||
@@ -332,7 +332,7 @@ Status CondRemovePass::GetCondInfo(const NodePtr &node, ComputeGraphPtr &graph, | |||
return FAILED; | |||
} | |||
} else { | |||
GELOGI("no need cond_remove_pass for node %s.", node->GetName().c_str()); | |||
GELOGD("no need cond_remove_pass for node %s.", node->GetName().c_str()); | |||
return NOT_CHANGED; | |||
} | |||
@@ -16,6 +16,7 @@ | |||
#include "graph/passes/mark_agnostic_pass.h" | |||
#include "graph/utils/node_utils.h" | |||
#include "graph/utils/tensor_utils.h" | |||
namespace ge { | |||
Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { | |||
@@ -47,6 +48,16 @@ Status MarkAgnosticPass::Run(ComputeGraphPtr graph) { | |||
} | |||
if (node_type == MERGE) { | |||
GELOGD("Mark format agnostic and continuous for merge node %s", node->GetName().c_str()); | |||
auto in_nodes = node->GetInAllNodes(); | |||
vector<NodePtr> input_nodes(in_nodes.begin(), in_nodes.end()); | |||
/// Enter-----------+ | |||
/// +-> Merge | |||
/// NextIteration---+ | |||
if (input_nodes.size() == 2) { | |||
if (input_nodes[0]->GetType() == ENTER && input_nodes[1]->GetType() == NEXTITERATION) { | |||
continue; | |||
} | |||
} | |||
const OpDescPtr op_desc = node->GetOpDesc(); | |||
const GeTensorDescPtr op_tensor = op_desc->MutableOutputDesc(0); | |||
if (op_tensor == nullptr) { | |||
@@ -278,7 +278,7 @@ Status MemcpyAddrAsyncPass::InsertMemcpyAddrAsyncNode(const OutDataAnchorPtr &ou | |||
} | |||
Status MemcpyAddrAsyncPass::InsertMemAddrAsyncNodeBeforeNetoutput(const ComputeGraphPtr &graph, const NodePtr &node) { | |||
GELOGI("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | |||
GELOGD("Start AddMemcpyAddrAsyncNode for %s.", node->GetName().c_str()); | |||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||
auto in_node = NodeUtils::GetInDataNodeByIndex(*node, in_data_anchor->GetIdx()); | |||
GE_CHECK_NOTNULL(in_node); | |||
@@ -33,7 +33,7 @@ Status MultiBatchPass::Run(ComputeGraphPtr graph) { | |||
OutDataAnchorPtr pred_value = nullptr; | |||
Status ret = FindPredValue(graph, pred_value); | |||
if (ret == NOT_CHANGED) { | |||
GELOGI("SwitchN node not exist, graph not changed."); | |||
GELOGD("SwitchN node not exist, graph not changed."); | |||
return SUCCESS; | |||
} | |||
if (ret != SUCCESS) { | |||
@@ -158,7 +158,7 @@ Status MultiBatchPass::FindPredValue(const ComputeGraphPtr &graph, OutDataAnchor | |||
} | |||
if (switch_n_nodes_.empty()) { | |||
GELOGI("SwitchN node not exist."); | |||
GELOGD("SwitchN node not exist."); | |||
return NOT_CHANGED; | |||
} | |||
@@ -128,7 +128,7 @@ Status SetInputOutputOffsetPass::SetInputOffsetForHcom(const ge::NodePtr &node, | |||
} | |||
Status SetInputOutputOffsetPass::SetInputOffset(const NodePtr &node, const vector<int> &connect_input) { | |||
GELOGI("Start to SetInputOffset for %s.", node->GetName().c_str()); | |||
GELOGD("Start to SetInputOffset for %s.", node->GetName().c_str()); | |||
std::vector<int64_t> memory_type; | |||
auto op_desc = node->GetOpDesc(); | |||
(void)ge::AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_MEM_TYPE_LIST, memory_type); | |||
@@ -241,7 +241,7 @@ Status SetInputOutputOffsetPass::SetOutputOffsetForHcom(const NodePtr &node, con | |||
} | |||
Status SetInputOutputOffsetPass::SetOutputOffset(const NodePtr &node, const vector<int> &connect_output) { | |||
GELOGI("Start SetOutputOffset of %s.", node->GetName().c_str()); | |||
GELOGD("Start SetOutputOffset of %s.", node->GetName().c_str()); | |||
bool attr_no_task = false; | |||
bool get_attr_no_task = ge::AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_NOTASK, attr_no_task); | |||
if (get_attr_no_task && attr_no_task) { | |||
@@ -117,6 +117,7 @@ | |||
#include "graph/passes/variable_op_pass.h" | |||
#include "graph/passes/variable_prepare_op_pass.h" | |||
#include "graph/passes/variable_ref_delete_op_pass.h" | |||
#include "graph/passes/mark_agnostic_pass.h" | |||
namespace ge { | |||
@@ -1700,6 +1701,7 @@ Status GraphPrepare::PrepareOptimize() { | |||
try { | |||
(void)original_graph_passes.AddPass("PrepareOptimize::ShapeOperateOpRemovePass", new ShapeOperateOpRemovePass); | |||
(void)original_graph_passes.AddPass("PrepareOptimize::ReplaceTransShapePass", new ReplaceTransShapePass); | |||
(void)original_graph_passes.AddPass("PrepareOptimize::MarkAgnosticPass" , new MarkAgnosticPass); | |||
} catch (std::bad_alloc &e) { | |||
GELOGE(INTERNAL_ERROR, "Add pass failed, bad memory allocation occurs."); | |||
return INTERNAL_ERROR; | |||
@@ -1571,6 +1571,10 @@ void GetDynamicShapeByMerge(const ComputeGraphPtr &graph, const NodePtr &node, | |||
// Connect NetOutput directly | |||
void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, | |||
const set<size_t> &dynamic_output_index, vector<string> &dynamic_output_dims) { | |||
if (!GetLocalOmgContext().dynamic_node_type.empty()) { | |||
GELOGD("No need to get directly shape info of %s when train.", node->GetName().c_str()); | |||
return; | |||
} | |||
GELOGD("Try get directly shape info, Graph: %s, Node: %s", graph->GetName().c_str(), node->GetName().c_str()); | |||
const auto &netoutput_desc = node->GetOpDesc(); | |||
const auto &inputnode_to_netoutput = node->GetInAllNodes(); | |||
@@ -1578,9 +1582,6 @@ void GetDirectOutputShape(const ComputeGraphPtr &graph, const NodePtr &node, | |||
if (dynamic_output_index.count(i) > 0) { | |||
continue; | |||
} | |||
if (inputnode_to_netoutput.at(i)->GetType() == GETDYNAMICDIMS) { | |||
continue; | |||
} | |||
auto tensor_desc = netoutput_desc->GetInputDesc(i); | |||
auto shape = tensor_desc.GetShape().ToString(); | |||
@@ -84,8 +84,10 @@ Status DistinguishGetNextAndData(ComputeGraphPtr &graph, vector<NodePtr> &data_n | |||
if (op_desc->GetType() == DATA && op_desc->GetName() != kShapeDataName) { | |||
if (op_desc->GetName().find(kSubstrOfGetNextNosinkName) == string::npos) { | |||
data_nodes.emplace_back(input_node); | |||
GELOGD("Name of data node is %s.", op_desc->GetName().c_str()); | |||
} else { | |||
getnext_nosink_nodes.emplace_back(input_node); | |||
GELOGD("Name of getnext nosink is %s.", op_desc->GetName().c_str()); | |||
} | |||
} | |||
if (IsGetNextType(input_node)) { | |||
@@ -111,6 +113,8 @@ Status CheckSequenceOfData(ComputeGraphPtr &graph, const vector<NodePtr> &data_n | |||
GE_CHECK_NOTNULL(data_node->GetOpDesc()); | |||
auto output_shape = data_node->GetOpDesc()->GetOutputDesc(0).GetShape().GetDims(); | |||
auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | |||
GELOGD("The %zu data node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), | |||
formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); | |||
if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | |||
GELOGI("No need to check sequence for constant."); | |||
continue; | |||
@@ -151,6 +155,8 @@ Status CheckSequenceOfGetnext(ComputeGraphPtr &graph, const vector<NodePtr> &get | |||
for (size_t i = 0; i < data_count; ++i) { | |||
auto output_shape = data_node->GetOpDesc()->GetOutputDesc(i).GetShape().GetDims(); | |||
auto dynamic_dims = GetLocalOmgContext().user_input_dims.at(i).second; | |||
GELOGD("The %zu getnext node is %s, node shape is %s, dynamic dim is %s.", i, data_node->GetName().c_str(), | |||
formats::JoinToString(output_shape).c_str(), formats::JoinToString(dynamic_dims).c_str()); | |||
if (output_shape.empty() && dynamic_dims.size() == 1 && dynamic_dims.at(0) == 0) { | |||
GELOGI("No need to check sequence for constant."); | |||
continue; | |||
@@ -80,6 +80,26 @@ LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||
include ${BUILD_HOST_SHARED_LIBRARY} | |||
#compiler for device ops kernel builder | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := libhost_cpu_opskernel_builder | |||
LOCAL_CFLAGS += -Werror | |||
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private | |||
LOCAL_LDFLAGS := | |||
LOCAL_STATIC_LIBRARIES := | |||
LOCAL_SHARED_LIBRARIES := libascend_protobuf \ | |||
libc_sec \ | |||
libslog \ | |||
libgraph \ | |||
libregister \ | |||
LOCAL_SRC_FILES := ops_kernel_store/host_cpu_ops_kernel_builder.cc | |||
LOCAL_C_INCLUDES := $(local_lib_inc_path) | |||
include ${BUILD_SHARED_LIBRARY} | |||
#compiler for host static lib | |||
include $(CLEAR_VARS) | |||
LOCAL_MODULE := libhost_cpu_opskernel_builder | |||
@@ -57,9 +57,9 @@ struct GraphExecutionContext { | |||
do { \ | |||
if ((context != nullptr) && (context)->profiler != nullptr) { \ | |||
if (node_name != nullptr) { \ | |||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GetTid(), node_name, category, ##__VA_ARGS__);\ | |||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] [%s] " fmt, GeLog::GetTid(), node_name, category, ##__VA_ARGS__);\ | |||
} else { \ | |||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GetTid(), category, ##__VA_ARGS__); \ | |||
context->profiler->RecordEvent(evt_type, "tid:%lu [%s] " fmt, GeLog::GetTid(), category, ##__VA_ARGS__); \ | |||
}\ | |||
} \ | |||
} while (0) | |||
@@ -57,6 +57,9 @@ Status AicpuExtInfoHandler::Parse(const std::string &ext_info) { | |||
case aicpu::FWKAdapter::FWK_ADPT_EXT_OUTPUT_SHAPE: | |||
GE_CHK_STATUS_RET(ParseExtOutputShape(aicpu_ext_info), "Parse ext output shape failed."); | |||
break; | |||
case aicpu::FWKAdapter::FWK_ADPT_EXT_SESSION_INFO: | |||
GE_CHK_STATUS_RET(ParseExtSessionInfo(aicpu_ext_info), "Parse ext session info failed."); | |||
break; | |||
default: | |||
GELOGD("Node[%s] ignore infoType=%d, infoLen=%u.", | |||
node_name_.c_str(), aicpu_ext_info->infoType, aicpu_ext_info->infoLen); | |||
@@ -123,6 +126,39 @@ Status AicpuExtInfoHandler::ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info) { | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info) { | |||
GE_CHK_BOOL_RET_STATUS(aicpu_ext_info->infoLen == sizeof(AicpuSessionInfo), PARAM_INVALID, | |||
"Node[%s] parse ext session info failed as infoLen must be %zu but %u.", | |||
node_name_.c_str(), sizeof(SessionInfo), aicpu_ext_info->infoLen); | |||
session_info_ = reinterpret_cast<AicpuSessionInfo *>(aicpu_ext_info->infoMsg); | |||
GELOGI("Node[%s] parse session info success infoLen=%u.", node_name_.c_str(), aicpu_ext_info->infoLen); | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag) { | |||
if (session_info_ == nullptr) { | |||
GELOGD("There is no session info in ext_info, no need update."); | |||
return SUCCESS; | |||
} | |||
session_info_->sessionId = session_id; | |||
session_info_->kernelId = kernel_id; | |||
session_info_->sessFlag = sess_flag; | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::UpdateSessionInfoSessionId(uint64_t session_id) { | |||
if (session_info_ == nullptr) { | |||
GELOGD("There is no session info in ext_info, no need update."); | |||
return SUCCESS; | |||
} | |||
session_info_->sessionId = session_id; | |||
session_info_->sessFlag = true; | |||
return SUCCESS; | |||
} | |||
Status AicpuExtInfoHandler::UpdateInputShapeAndType(uint32_t input_index, const GeTensorDesc &input_desc) { | |||
GE_CHECK_LE(input_index, input_num_); | |||
const auto &shape = input_desc.GetShape(); | |||
@@ -19,6 +19,7 @@ | |||
#include "external/ge/ge_api_error_codes.h" | |||
#include "cce/fwk_adpt_struct.h" | |||
#include "cce/aicpu_engine_struct.h" | |||
#include "graph/op_desc.h" | |||
#include "graph/ge_tensor.h" | |||
@@ -26,6 +27,7 @@ namespace ge { | |||
namespace hybrid { | |||
using AicpuShapeAndType = aicpu::FWKAdapter::ShapeAndType; | |||
using AicpuExtInfo = aicpu::FWKAdapter::ExtInfo; | |||
using AicpuSessionInfo = SessionInfo; | |||
class AicpuExtInfoHandler { | |||
public: | |||
@@ -51,6 +53,10 @@ class AicpuExtInfoHandler { | |||
Status UpdateOutputShapeAndType(uint32_t output_index, const GeTensorDesc &output_desc); | |||
Status UpdateSessionInfo(uint64_t session_id, uint64_t kernel_id, bool sess_flag); | |||
Status UpdateSessionInfoSessionId(uint64_t session_id); | |||
Status GetOutputShapeAndType(uint32_t output_index, GeShape &shape, DataType &data_type); | |||
private: | |||
@@ -58,6 +64,7 @@ class AicpuExtInfoHandler { | |||
Status ParseExtShapeType(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtInputShape(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtOutputShape(AicpuExtInfo *aicpu_ext_info); | |||
Status ParseExtSessionInfo(AicpuExtInfo *aicpu_ext_info); | |||
static Status UpdateShapeAndType(const GeShape &shape, | |||
DataType data_type, | |||
@@ -72,6 +79,7 @@ class AicpuExtInfoHandler { | |||
const uint32_t input_num_; | |||
const uint32_t output_num_; | |||
UnknowShapeOpType unknown_type_; | |||
AicpuSessionInfo *session_info_ = nullptr; | |||
std::unique_ptr<uint8_t[]> ext_info_; | |||
size_t ext_info_len_ = 0; | |||
@@ -40,29 +40,36 @@ Status AicpuNodeTaskBase::AllocTensorBuffer(size_t size, std::unique_ptr<TensorB | |||
return SUCCESS; | |||
} | |||
Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info) { | |||
if (node_item_->is_dynamic) { | |||
// dynamic node must have ext info | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), | |||
"Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", | |||
node_name_.c_str(), kernel_ext_info.size()); | |||
} | |||
// if no ext info no need copy to device. | |||
Status AicpuNodeTaskBase::InitExtInfo(const std::string &kernel_ext_info, int64_t session_id) { | |||
if (kernel_ext_info.empty()) { | |||
GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", | |||
node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); | |||
return SUCCESS; | |||
if (node_item_->is_dynamic) { | |||
// dynamic node must have ext info | |||
GELOGE(PARAM_INVALID, "Node[%s] parse ext info failed as ext info is empty.", node_name_.c_str()); | |||
return PARAM_INVALID; | |||
} else { | |||
// if no ext info no need copy to device. | |||
GELOGI("Node[%s] kernel_ext_info is empty, no need copy to device, is_dynamic=%s.", | |||
node_name_.c_str(), node_item_->is_dynamic ? "true" : "false"); | |||
return SUCCESS; | |||
} | |||
} | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_.Parse(kernel_ext_info), | |||
"Node[%s] parse kernel ext info failed, kernel_ext_info_size=%zu.", | |||
node_name_.c_str(), kernel_ext_info.size()); | |||
GELOGD("To update aicpu_task ext_info session_info session_id to %lu", session_id); | |||
GE_CHK_STATUS_RET(aicpu_ext_handle_.UpdateSessionInfoSessionId(session_id), | |||
"UpdateSessionInfoSessionId failed."); | |||
// copy task args buf | |||
GE_CHK_STATUS_RET(AllocTensorBuffer(kernel_ext_info.size(), ext_info_addr_dev_), | |||
GE_CHK_STATUS_RET(AllocTensorBuffer(aicpu_ext_handle_.GetExtInfoLen(), ext_info_addr_dev_), | |||
"Node[%s] alloc kernel_ext_info buf failed, size=%zu", | |||
node_name_.c_str(), kernel_ext_info.size()); | |||
node_name_.c_str(), aicpu_ext_handle_.GetExtInfoLen()); | |||
// copy default ext info to device | |||
GE_CHK_RT_RET(rtMemcpy(ext_info_addr_dev_->GetData(), ext_info_addr_dev_->GetSize(), | |||
kernel_ext_info.data(), kernel_ext_info.size(), RT_MEMCPY_HOST_TO_DEVICE)); | |||
aicpu_ext_handle_.GetExtInfo(), aicpu_ext_handle_.GetExtInfoLen(), | |||
RT_MEMCPY_HOST_TO_DEVICE)); | |||
return SUCCESS; | |||
} | |||
@@ -290,7 +297,8 @@ Status AicpuTfNodeTask::Init(const HybridModel &model) { | |||
node_name_.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | |||
// init ext info | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name_.c_str()); | |||
uint64_t ext_session_id = model.GetSessionId(); | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name_.c_str()); | |||
GE_CHK_STATUS_RET(InitForDependComputeTask(), "Node[%s] init for depend compute task failed.", node_name_.c_str()); | |||
// build fwk_op_kernel. | |||
@@ -679,7 +687,8 @@ Status AicpuNodeTask::Init(const HybridModel &model) { | |||
"Node[%s] task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | |||
node_name.c_str(), kernel_ext_info.size(), kernel_ext_info_size); | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info), "Node[%s] init ext info failed.", node_name.c_str()); | |||
uint64_t ext_session_id = model.GetSessionId(); | |||
GE_CHK_STATUS_RET(InitExtInfo(kernel_ext_info, ext_session_id), "Node[%s] init ext info failed.", node_name.c_str()); | |||
if (ext_info_addr_dev_ == nullptr) { | |||
aicpu_param_head->extInfoLength = 0; | |||
@@ -43,7 +43,7 @@ class AicpuNodeTaskBase : public NodeTask { | |||
Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override; | |||
protected: | |||
virtual Status InitExtInfo(const std::string &kernel_ext_info); | |||
virtual Status InitExtInfo(const std::string &kernel_ext_info, int64_t session_id); | |||
virtual Status UpdateExtInfo(); | |||
@@ -110,7 +110,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||
Status initSystemStatus = SystemInitialize(options); | |||
GE_TIMESTAMP_END(SystemInitialize, "InnerInitialize::SystemInitialize"); | |||
if (initSystemStatus != SUCCESS) { | |||
GELOGE(initSystemStatus); | |||
GELOGE(initSystemStatus, "GE system initial failed."); | |||
RollbackInit(); | |||
return initSystemStatus; | |||
} | |||
@@ -120,7 +120,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||
Status initEmStatus = engineManager_.Initialize(options); | |||
GE_TIMESTAMP_END(EngineInitialize, "InnerInitialize::EngineInitialize"); | |||
if (initEmStatus != SUCCESS) { | |||
GELOGE(initEmStatus); | |||
GELOGE(initEmStatus, "GE engine manager initial failed."); | |||
RollbackInit(); | |||
return initEmStatus; | |||
} | |||
@@ -130,7 +130,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||
Status initOpsStatus = opsManager_.Initialize(options); | |||
GE_TIMESTAMP_END(OpsManagerInitialize, "InnerInitialize::OpsManagerInitialize"); | |||
if (initOpsStatus != SUCCESS) { | |||
GELOGE(initOpsStatus); | |||
GELOGE(initOpsStatus, "GE ops manager initial failed."); | |||
RollbackInit(); | |||
return initOpsStatus; | |||
} | |||
@@ -140,7 +140,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||
Status initOpsBuilderStatus = OpsKernelBuilderManager::Instance().Initialize(options); | |||
GE_TIMESTAMP_END(OpsKernelBuilderManagerInitialize, "InnerInitialize::OpsKernelBuilderManager"); | |||
if (initOpsBuilderStatus != SUCCESS) { | |||
GELOGE(initOpsBuilderStatus); | |||
GELOGE(initOpsBuilderStatus, "GE ops builder manager initial failed."); | |||
RollbackInit(); | |||
return initOpsBuilderStatus; | |||
} | |||
@@ -150,7 +150,7 @@ Status GELib::InnerInitialize(const map<string, string> &options) { | |||
Status initSmStatus = sessionManager_.Initialize(options); | |||
GE_TIMESTAMP_END(SessionManagerInitialize, "InnerInitialize::SessionManagerInitialize"); | |||
if (initSmStatus != SUCCESS) { | |||
GELOGE(initSmStatus); | |||
GELOGE(initSmStatus, "GE session manager initial failed."); | |||
RollbackInit(); | |||
return initSmStatus; | |||
} | |||
@@ -504,7 +504,7 @@ void PrintOptionMap(std::map<std::string, std::string> &options, std::string tip | |||
for (auto iter = options.begin(); iter != options.end(); iter++) { | |||
std::string key = iter->first; | |||
std::string option_name = iter->second; | |||
GELOGI("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||
GELOGD("%s set successfully, option_key=%s, option_value=%s", tips.c_str(), key.c_str(), option_name.c_str()); | |||
} | |||
} | |||
@@ -340,7 +340,7 @@ void Impl::SetRtSocVersion() { | |||
if (rt_ret != RT_ERROR_NONE) { | |||
GELOGW("Set soc version %s failed. ret:0x%X", soc_version, rt_ret); | |||
} | |||
GELOGI("Set soc version %s success.", soc_version); | |||
GELOGD("Set soc version %s success.", soc_version); | |||
} | |||
} | |||
@@ -359,25 +359,25 @@ graphStatus Impl::CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTe | |||
GE_CHECK_NOTNULL(op); | |||
if (op->GetType() == DATA) { | |||
(void)AttrUtils::SetInt(op, ATTR_NAME_INDEX, index++); | |||
GELOGI("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); | |||
GELOGD("Data op inputDesc size: %zu", op->GetAllInputsDesc().size()); | |||
ge::GeTensorDesc tensor = op->GetInputDesc(0); | |||
string data_op_name = op->GetName(); | |||
GELOGI("Data op name: %s", data_op_name.c_str()); | |||
GELOGD("Data op name: %s", data_op_name.c_str()); | |||
ge::GeShape data_shape; | |||
auto iter = omg_context_.input_dims.find(data_op_name); | |||
if (iter != omg_context_.input_dims.end()) { | |||
data_shape = ge::GeShape(iter->second); | |||
GELOGI("Data op get shape from Context."); | |||
GELOGD("Data op get shape from Context."); | |||
} else { | |||
data_shape = tensor.GetShape(); | |||
GELOGI("Data op get shape from InputDesc in ge ir graph."); | |||
GELOGD("Data op get shape from InputDesc in ge ir graph."); | |||
} | |||
// If user point input format, do work for all data ops; else do according to tensor_desc | |||
auto data_format = omg_context_.format != domi::DOMI_TENSOR_ND ? | |||
ge::TypeUtils::DomiFormatToFormat(omg_context_.format) : tensor.GetFormat(); | |||
ge::DataType data_type = tensor.GetDataType(); | |||
string data_type_str = ge::TypeUtils::DataTypeToSerialString(data_type); | |||
GELOGI("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | |||
GELOGD("Data op get data type:%s from InputDesc in ge ir graph.", data_type_str.c_str()); | |||
ge::GeTensor inputTensor; | |||
ge::GeTensorDesc desc(data_shape, ge::Format(data_format), data_type); | |||
@@ -69,7 +69,7 @@ target_link_libraries(atc PRIVATE | |||
json | |||
runtime_compile | |||
slog | |||
mmpa | |||
static_mmpa | |||
-lrt | |||
-ldl | |||
) | |||
@@ -52,9 +52,11 @@ void CsaInteract::Init(int32_t dev_index, int64_t job_id) { | |||
if (!is_init_) { | |||
dev_index_ = dev_index; | |||
job_id_ = job_id; | |||
char *file_dir_env = std::getenv(FMK_STATUS_FILE_DIR_ENV); | |||
char file_dir_env[MMPA_MAX_PATH] = { 0x00 }; | |||
INT32 res = mmGetEnv(FMK_STATUS_FILE_DIR_ENV, file_dir_env, MMPA_MAX_PATH); | |||
string csa_path_prefix; | |||
if (file_dir_env != nullptr) { | |||
if (res == EN_OK) { | |||
csa_path_prefix = file_dir_env; | |||
} | |||
if (!csa_path_prefix.empty()) { | |||
@@ -186,21 +188,21 @@ Status CsaInteract::WriteHcomDetection(const std::string &content) { | |||
/// | |||
Status CsaInteract::WriteFile(const std::string &file_name, const std::string &content) { | |||
// if file path is not exist, then make path | |||
INT32 flags = O_WRONLY | O_TRUNC | O_CREAT; | |||
int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP); | |||
INT32 flags = M_WRONLY | O_TRUNC | M_CREAT; | |||
int32_t fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||
if (fd == EN_ERROR) { | |||
if (MakePath(file_name) != SUCCESS) { | |||
GELOGE(INTERNAL_ERROR, "csainteract create file path fail, errno is %d", errno); | |||
return INTERNAL_ERROR; | |||
} | |||
fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | S_IRGRP); | |||
fd = mmOpen2(file_name.c_str(), flags, M_IRUSR | M_IWUSR | M_UMASK_GRPREAD); | |||
if (fd == EN_ERROR) { | |||
GELOGE(INTERNAL_ERROR, "open file fail, errno is %d", errno); | |||
return INTERNAL_ERROR; | |||
} | |||
} | |||
ssize_t ret = write(fd, content.c_str(), content.length()); | |||
mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length()); | |||
if (ret == EN_ERROR) { | |||
GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno); | |||
ret = mmClose(fd); | |||
@@ -239,7 +241,7 @@ Status CsaInteract::MakePath(const std::string &file_name) { | |||
while (found != std::string::npos) { | |||
std::string pre_path = file_path.substr(0, found + 1); | |||
if (mmAccess(pre_path.c_str()) != EN_OK) { | |||
if (mmMkdir(pre_path.c_str(), S_IRWXU) != EN_OK) { | |||
if (mmMkdir(pre_path.c_str(), M_IRWXU) != EN_OK) { | |||
GELOGE(INTERNAL_ERROR, "csainteract mkdir fail, errno is %d", errno); | |||
return INTERNAL_ERROR; | |||
} | |||
@@ -85,7 +85,7 @@ Status OpsKernelManager::Initialize(const map<string, string> &options_const) { | |||
initialize_ = options; | |||
Status rst0 = plugin_manager_.InvokeAll<map<string, string> &, Status>(kInitialize, initialize_); | |||
if (rst0 == FAILED) { | |||
GELOGE(GE_OPS_GET_NO_VALID_SO); | |||
GELOGE(GE_OPS_GET_NO_VALID_SO, "There is invalid so about OpsKernelInfoStore."); | |||
return GE_OPS_GET_NO_VALID_SO; | |||
} | |||
Status rst1 = | |||
@@ -391,7 +391,7 @@ void OpsKernelManager::GetGraphOptimizerByEngine(const std::string &engine_name, | |||
continue; | |||
} | |||
if (attrs.engineName == engine_name) { | |||
GELOGI("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(), | |||
GELOGD("GetGraphOptimizerByEngine GraphOptimzer name: %s, engineName: %s", (it.first).c_str(), | |||
attrs.engineName.c_str()); | |||
graph_optimizer.push_back(it.second); | |||
} | |||
@@ -61,7 +61,7 @@ Status SessionManager::SetRtContext(SessionId session_id, rtContext_t rt_context | |||
Status SessionManager::CreateSession(const std::map<std::string, std::string> &options, SessionId &session_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionId next_session_id = 0; | |||
@@ -92,7 +92,7 @@ Status SessionManager::CreateSession(const std::map<std::string, std::string> &o | |||
Status SessionManager::DestroySession(SessionId session_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
std::lock_guard<std::mutex> lock(mutex_); | |||
@@ -119,7 +119,7 @@ Status SessionManager::DestroySession(SessionId session_id) { | |||
Status SessionManager::GetVariable(SessionId session_id, const std::string &name, Tensor &val) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -143,7 +143,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G | |||
Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const Graph &graph, | |||
const std::map<std::string, std::string> &options) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -173,7 +173,7 @@ Status SessionManager::AddGraph(SessionId session_id, uint32_t graph_id, const G | |||
Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, const Graph &graph, | |||
const std::map<std::string, std::string> &options) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -203,7 +203,7 @@ Status SessionManager::AddGraphWithCopy(SessionId session_id, uint32_t graph_id, | |||
Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const std::vector<Tensor> &inputs, | |||
std::vector<Tensor> &outputs) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -221,7 +221,7 @@ Status SessionManager::RunGraph(SessionId session_id, uint32_t graph_id, const s | |||
Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -239,7 +239,7 @@ Status SessionManager::RemoveGraph(SessionId session_id, uint32_t graph_id) { | |||
bool SessionManager::HasSession(SessionId session_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return false; | |||
} | |||
return session_manager_map_.find(session_id) != session_manager_map_.end(); | |||
@@ -247,7 +247,7 @@ bool SessionManager::HasSession(SessionId session_id) { | |||
Status SessionManager::GetNextSessionId(SessionId &next_session_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
static SessionId session_id = 0; | |||
@@ -260,7 +260,7 @@ Status SessionManager::RegisterCallBackFunc( | |||
SessionId session_id, const std::string &key, | |||
const std::function<Status(uint32_t, const std::map<std::string, ge::Tensor> &)> &callback) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -278,7 +278,7 @@ Status SessionManager::RegisterCallBackFunc( | |||
Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const std::vector<InputTensorInfo> &inputs) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -297,7 +297,7 @@ Status SessionManager::BuildGraph(SessionId session_id, uint32_t graph_id, const | |||
Status SessionManager::RunGraphAsync(SessionId session_id, uint32_t graph_id, | |||
const std::vector<InputTensorInfo> &inputs, RunAsyncCallback callback) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -317,7 +317,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std: | |||
std::vector<Tensor> &var_values) { | |||
// step 0: init session manager | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return GE_SESSION_MANAGER_NOT_INIT; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -383,7 +383,7 @@ Status SessionManager::GetVariables(SessionId session_id, const std::vector<std: | |||
bool SessionManager::IsGraphNeedRebuild(SessionId session_id, uint32_t graph_id) { | |||
if (!init_flag_) { | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT); | |||
GELOGE(GE_SESSION_MANAGER_NOT_INIT, "Session manager is not initialized."); | |||
return true; | |||
} | |||
SessionPtr innerSession = nullptr; | |||
@@ -44,8 +44,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY SingleOp::~SingleOp() { | |||
delete task; | |||
task = nullptr; | |||
} | |||
GELOGI("SingleOp destory sessionId = %lu", aicpu_session_id_); | |||
ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); | |||
} | |||
Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs) { | |||
@@ -59,7 +57,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std:: | |||
for (size_t i = 0; i < num_inputs; ++i) { | |||
// preventing from read out of bound | |||
size_t aligned_size = GetAlignedSize(inputs[i].length); | |||
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%lu", | |||
GELOGI("Input [%zu], aligned_size:%zu, inputs.length:%lu, input_sizes_:%zu", | |||
i, aligned_size, inputs[i].length, input_sizes_[i]); | |||
if (aligned_size < input_sizes_[i]) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Input size mismatch. index = %zu, model expect %zu," | |||
@@ -77,7 +75,7 @@ Status SingleOp::ValidateArgs(const std::vector<DataBuffer> &inputs, const std:: | |||
for (size_t i = 0; i < num_outputs; ++i) { | |||
// preventing from write out of bound | |||
size_t aligned_size = GetAlignedSize(outputs[i].length); | |||
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%lu", | |||
GELOGI("Output [%zu], aligned_size:%zu, outputs.length:%lu, output_sizes_:%zu", | |||
i, aligned_size, outputs[i].length, output_sizes_[i]); | |||
if (aligned_size < output_sizes_[i]) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "Output size mismatch. index = %zu, model expect %zu," | |||
@@ -143,7 +141,7 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve | |||
GE_CHECK_NOTNULL(task_io_addr); | |||
auto io_addr = reinterpret_cast<uint64_t *>(const_cast<uintptr_t *>(task_io_addr)); | |||
for (size_t i = 0; i < io_addr_num; ++i) { | |||
io_addr[i] = reinterpret_cast<uintptr_t>(args_[i]); | |||
io_addr[i] = static_cast<uintptr_t>(args_[i]); | |||
} | |||
} else { | |||
GELOGW("Only TF_kernel aicpu and aicpu_CC are supported, but got %u", task->GetOpTaskType()); | |||
@@ -180,17 +178,11 @@ void SingleOp::SetStream(rtStream_t stream) { | |||
stream_ = stream; | |||
} | |||
void SingleOp::SetSessionID(uint64_t session_id) { | |||
aicpu_session_id_ = session_id; | |||
} | |||
DynamicSingleOp::DynamicSingleOp(uintptr_t resource_id, std::mutex *stream_mutex, rtStream_t stream) | |||
: resource_id_(resource_id), stream_mutex_(stream_mutex), stream_(stream) { | |||
} | |||
DynamicSingleOp::~DynamicSingleOp() { | |||
GELOGI("DynamicSingleOp destory sessionId = %lu", aicpu_session_id_); | |||
ModelManager::GetInstance()->DestroyAicpuSession(aicpu_session_id_); | |||
} | |||
Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc, | |||
@@ -299,8 +291,4 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc, | |||
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; | |||
} | |||
} | |||
void DynamicSingleOp::SetSessionID(uint64_t session_id) { | |||
aicpu_session_id_ = session_id; | |||
} | |||
} // namespace ge |
@@ -37,7 +37,6 @@ class SingleOp { | |||
Status ExecuteAsync(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
void SetStream(rtStream_t stream); | |||
void SetSessionID(uint64_t session_id); | |||
private: | |||
Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | |||
@@ -52,7 +51,6 @@ class SingleOp { | |||
std::vector<void *> output_addr_list_; | |||
std::vector<size_t> output_sizes_; | |||
std::vector<uintptr_t> args_; | |||
uint64_t aicpu_session_id_ = 0; | |||
std::vector<OpTask *> tasks_; | |||
std::vector<std::vector<uintptr_t *>> arg_table_; | |||
@@ -66,7 +64,6 @@ class DynamicSingleOp { | |||
const std::vector<DataBuffer> &inputs, | |||
std::vector<GeTensorDesc> &output_desc, | |||
std::vector<DataBuffer> &outputs); | |||
void SetSessionID(uint64_t session_id); | |||
private: | |||
friend class SingleOpModel; | |||
@@ -89,7 +86,6 @@ class DynamicSingleOp { | |||
rtStream_t stream_ = nullptr; | |||
size_t num_inputs_ = 0; | |||
size_t num_outputs_ = 0; | |||
uint64_t aicpu_session_id_ = 0; | |||
}; | |||
} // namespace ge | |||
#endif // GE_SINGLE_OP_SINGLE_OP_H_ |
@@ -32,7 +32,7 @@ | |||
#include "task/aicpu_kernel_task_builder.h" | |||
#include "task/tbe_task_builder.h" | |||
static std::atomic<std::uint64_t> aicpu_sessionid(0); | |||
static std::atomic<std::uint64_t> aicpu_kernel_id(0); | |||
using domi::TaskDef; | |||
using std::unique_ptr; | |||
@@ -252,7 +252,9 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||
GELOGD("Building AICPU_CC task"); | |||
OpTask *task = nullptr; | |||
auto ret = BuildCpuKernelTask(task_def.kernel(), &task); | |||
uint64_t singleop_kernel_id = aicpu_kernel_id++; | |||
GELOGI("Build singleOp CCTask, kernel_id = %lu", singleop_kernel_id); | |||
auto ret = BuildCpuKernelTask(task_def.kernel(), &task, singleop_kernel_id); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
@@ -265,14 +267,13 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { | |||
GELOGD("Building AICPU_TF task"); | |||
AiCpuTask *aicpu_task = nullptr; | |||
bool depend_compute_flag = false; | |||
uint64_t singleop_sessionid = aicpu_sessionid++; | |||
GELOGI("Build singleOp, sessionId = %lu", singleop_sessionid); | |||
auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_sessionid); | |||
uint64_t singleop_kernel_id = aicpu_kernel_id++; | |||
GELOGI("Build singleOp TfTask, kernel_id = %lu", singleop_kernel_id); | |||
auto ret = BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, false, depend_compute_flag, singleop_kernel_id); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
} | |||
single_op.tasks_.emplace_back(aicpu_task); | |||
single_op.SetSessionID(singleop_sessionid); | |||
} else { | |||
// skip | |||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
@@ -329,7 +330,7 @@ Status SingleOpModel::BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTa | |||
} | |||
Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | |||
bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id) { | |||
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id) { | |||
auto iter = op_list_.find(kernel_def.op_index()); | |||
if (iter == op_list_.end()) { | |||
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "op desc not found. op index = %u", kernel_def.op_index()); | |||
@@ -342,7 +343,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC | |||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||
} | |||
auto builder = AiCpuTaskBuilder(iter->second->GetOpDesc(), kernel_def); | |||
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, session_id); | |||
auto ret = builder.BuildTask(*aicpu_task, model_params_, dynamic_flag, kernel_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "build aicpu_TF op task failed"); | |||
return ret; | |||
@@ -353,7 +354,7 @@ Status SingleOpModel::BuildKernelExTask(const domi::KernelExDef &kernel_def, AiC | |||
return SUCCESS; | |||
} | |||
Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task) { | |||
Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id) { | |||
const auto &context = kernel_def.context(); | |||
auto iter = op_list_.find(context.op_index()); | |||
if (iter == op_list_.end()) { | |||
@@ -367,7 +368,7 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa | |||
} | |||
auto builder = AiCpuCCTaskBuilder(iter->second->GetOpDesc(), kernel_def); | |||
auto ret = builder.BuildTask(*aicpucc_task); | |||
auto ret = builder.BuildTask(*aicpucc_task, kernel_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "build aicpu_CC op task failed"); | |||
return ret; | |||
@@ -396,7 +397,9 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl | |||
} else if (kernel_type == cce::ccKernelType::AI_CPU || kernel_type == cce::ccKernelType::CUST_AI_CPU) { | |||
GELOGD("Building AICPU_CC task"); | |||
OpTask *task = nullptr; | |||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task)); | |||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||
GELOGI("Build dynamic singleOp CCTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||
GE_CHK_STATUS_RET_NOLOG(BuildCpuKernelTask(task_def.kernel(), &task, dynamic_singleop_kernel_id)); | |||
single_op.op_task_.reset(task); | |||
} else { | |||
GELOGE(ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID, | |||
@@ -430,10 +433,10 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
GELOGD("Building AICPU_TF task"); | |||
AiCpuTask *aicpu_task = nullptr; | |||
bool depend_compute_flag = false; | |||
uint64_t dynamic_singleop_sessionid = aicpu_sessionid++; | |||
GELOGI("Build dynamic singleOp, sessionId = %lu", dynamic_singleop_sessionid); | |||
uint64_t dynamic_singleop_kernel_id = aicpu_kernel_id++; | |||
GELOGI("Build dynamic singleOp TfTask, kernel_id = %lu", dynamic_singleop_kernel_id); | |||
GE_CHK_STATUS_RET_NOLOG(BuildKernelExTask(task_def.kernel_ex(), &aicpu_task, true, | |||
depend_compute_flag, dynamic_singleop_sessionid)); | |||
depend_compute_flag, dynamic_singleop_kernel_id)); | |||
if (depend_compute_flag) { | |||
if (i >= tasks.size() - 1) { | |||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "The copy task of the fourth operator was not found."); | |||
@@ -444,7 +447,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { | |||
GE_CHK_STATUS_RET_NOLOG(aicpu_task->SetMemCopyTask(copy_task_def.kernel_ex())); | |||
} | |||
single_op.op_task_.reset(aicpu_task); | |||
single_op.SetSessionID(dynamic_singleop_sessionid); | |||
} else { | |||
// skip | |||
GELOGD("Skip task type: %d", static_cast<int>(task_type)); | |||
@@ -69,8 +69,8 @@ class SingleOpModel { | |||
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); | |||
Status BuildKernelTask(const domi::KernelDef &kernel_def, TbeOpTask **task); | |||
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, | |||
bool dynamic_flag, bool& depend_compute_flag, uint64_t session_id); | |||
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task); | |||
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); | |||
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); | |||
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); | |||
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); | |||
@@ -46,7 +46,7 @@ Status AiCpuCCTaskBuilder::SetKernelArgs(AiCpuCCTask &task) { | |||
return SUCCESS; | |||
} | |||
Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||
Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) { | |||
auto ret = SetKernelArgs(task); | |||
if (ret != SUCCESS) { | |||
return ret; | |||
@@ -76,7 +76,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { | |||
"task def kernel_ext_info.size=%zu, but kernel_ext_info_size=%u.", | |||
kernel_ext_info.size(), kernel_ext_info_size); | |||
ret = task.SetExtInfoAndType(kernel_ext_info); | |||
ret = task.SetExtInfoAndType(kernel_ext_info, kernel_id); | |||
if (ret != SUCCESS) { | |||
GELOGE(ret, "Init ext info failed."); | |||
return ret; | |||