Merry xmas by the waytags/v1.1.0
@@ -607,7 +607,7 @@ set(INFER_SRC_LIST | |||||
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES) | ||||
############ libge_runner.so ############ | ############ libge_runner.so ############ | ||||
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>) | |||||
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS}) | |||||
target_compile_definitions(ge_runner PRIVATE | target_compile_definitions(ge_runner PRIVATE | ||||
PROTOBUF_INLINE_NOT_IN_HEADERS=0 | PROTOBUF_INLINE_NOT_IN_HEADERS=0 | ||||
@@ -648,11 +648,14 @@ target_include_directories(ge_runner PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_libraries(ge_runner | |||||
target_link_libraries(ge_runner PRIVATE | |||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
ge_memory | ge_memory | ||||
adump_server | adump_server | ||||
static_mmpa | static_mmpa | ||||
-Wl,--whole-archive | |||||
msprofiler_fwk | |||||
-Wl,--no-whole-archive | |||||
-Wl,--no-as-needed | -Wl,--no-as-needed | ||||
graph | graph | ||||
ge_common | ge_common | ||||
@@ -712,7 +715,7 @@ target_include_directories(ge_compiler PRIVATE | |||||
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain | ||||
) | ) | ||||
target_link_libraries(ge_compiler | |||||
target_link_libraries(ge_compiler PRIVATE | |||||
$<BUILD_INTERFACE:intf_pub> | $<BUILD_INTERFACE:intf_pub> | ||||
ge_memory | ge_memory | ||||
static_mmpa | static_mmpa | ||||
@@ -766,7 +769,14 @@ target_link_options(opensrc_ascendcl PRIVATE | |||||
-Wl,--allow-multiple-definition | -Wl,--allow-multiple-definition | ||||
-Wl,-z,muldefs | -Wl,-z,muldefs | ||||
-Wl,-Bsymbolic | -Wl,-Bsymbolic | ||||
-Wl,--exclude-libs,ALL | |||||
-Wl,--exclude-libs,libascend_protobuf.a | |||||
-Wl,--exclude-libs,libge_executor.a | |||||
-Wl,--exclude-libs,libge_common.a | |||||
-Wl,--exclude-libs,libgraph.a | |||||
-Wl,--exclude-libs,libmmpa.a | |||||
-Wl,--exclude-libs,libregister.a | |||||
-Wl,--exclude-libs,liberror_manager.a | |||||
-Wl,--exclude-libs,libadump_server.a | |||||
) | ) | ||||
target_link_libraries(opensrc_ascendcl PRIVATE | target_link_libraries(opensrc_ascendcl PRIVATE | ||||
-Wl,--whole-archive | -Wl,--whole-archive | ||||
@@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) { | |||||
for (auto dim : output_descs.at(i).GetShape().GetDims()) { | for (auto dim : output_descs.at(i).GetShape().GetDims()) { | ||||
output.mutable_shape()->add_dim(dim); | output.mutable_shape()->add_dim(dim); | ||||
} | } | ||||
for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) { | |||||
output.mutable_origin_shape()->add_dim(dim); | |||||
} | |||||
int64_t output_size = 0; | int64_t output_size = 0; | ||||
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) { | ||||
GELOGE(PARAM_INVALID, "Get output size filed"); | GELOGE(PARAM_INVALID, "Get output size filed"); | ||||
@@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) { | |||||
for (auto dim : input_descs.at(i).GetShape().GetDims()) { | for (auto dim : input_descs.at(i).GetShape().GetDims()) { | ||||
input.mutable_shape()->add_dim(dim); | input.mutable_shape()->add_dim(dim); | ||||
} | } | ||||
for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) { | |||||
input.mutable_origin_shape()->add_dim(dim); | |||||
} | |||||
int64_t input_size = 0; | int64_t input_size = 0; | ||||
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) { | ||||
GELOGE(PARAM_INVALID, "Get output size filed"); | GELOGE(PARAM_INVALID, "Get output size filed"); | ||||
@@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() { | |||||
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info); | ||||
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(), | ||||
dump_path.c_str()); | dump_path.c_str()); | ||||
uint32_t task_id = 0; | |||||
uint32_t stream_id = 0; | |||||
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret); | |||||
} | |||||
aicpu::dump::Task task; | aicpu::dump::Task task; | ||||
task.set_task_id(task_id); | |||||
task.set_stream_id(stream_id); | |||||
task.mutable_op()->set_op_name(op_desc_->GetName()); | task.mutable_op()->set_op_name(op_desc_->GetName()); | ||||
task.mutable_op()->set_op_type(op_desc_->GetType()); | task.mutable_op()->set_op_type(op_desc_->GetType()); | ||||
if (dump_properties_.GetDumpMode() == kDumpOutput) { | if (dump_properties_.GetDumpMode() == kDumpOutput) { | ||||
@@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) { | |||||
void TBEPluginManager::LoadCustomOpLib() { | void TBEPluginManager::LoadCustomOpLib() { | ||||
LoadPluginSo(options_); | LoadPluginSo(options_); | ||||
std::string fmk_type = std::to_string(domi::TENSORFLOW); | |||||
auto it = options_.find(ge::FRAMEWORK_TYPE); | |||||
if (it != options_.end()) { | |||||
fmk_type = it->second; | |||||
} | |||||
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas; | ||||
GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | GELOGI("The size of registration_datas is: %zu", registration_datas.size()); | ||||
for (OpRegistrationData reg_data : registration_datas) { | for (OpRegistrationData reg_data : registration_datas) { | ||||
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), | |||||
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); | |||||
domi::OpRegistry::Instance()->Register(reg_data); | |||||
if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) { | |||||
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(), | |||||
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str()); | |||||
(void)domi::OpRegistry::Instance()->Register(reg_data); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { | |||||
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { | ||||
GELOGW("Msprof ctrl callback is exist, just ignore it."); | GELOGW("Msprof ctrl callback is exist, just ignore it."); | ||||
} else { | } else { | ||||
GELOGI("GE register Msprof ctrl callback."); | |||||
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); | ||||
} | } | ||||
return ge::SUCCESS; | return ge::SUCCESS; | ||||
@@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { | |||||
return ge::PARAM_INVALID; | return ge::PARAM_INVALID; | ||||
} | } | ||||
// Pass MsprofSetDeviceCallback to runtime | // Pass MsprofSetDeviceCallback to runtime | ||||
GELOGI("GE pass setdevice callback to runtime."); | |||||
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); | ||||
if (rt_ret != ge::SUCCESS) { | if (rt_ret != ge::SUCCESS) { | ||||
GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); | GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); | ||||
@@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||||
if (type != kProfCommandhandleFinalize) { | if (type != kProfCommandhandleFinalize) { | ||||
GE_CHECK_NOTNULL(data); | GE_CHECK_NOTNULL(data); | ||||
} | } | ||||
ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data; | |||||
ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data); | |||||
auto iter = kProfCommandTypeMap.find(type); | auto iter = kProfCommandTypeMap.find(type); | ||||
if (iter == kProfCommandTypeMap.end()) { | if (iter == kProfCommandTypeMap.end()) { | ||||
GELOGW("The prof comand type is invalid."); | GELOGW("The prof comand type is invalid."); | ||||
@@ -183,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le | |||||
if (type != kProfCommandhandleFinalize) { | if (type != kProfCommandhandleFinalize) { | ||||
command.module_index = prof_config_param->profSwitch; | command.module_index = prof_config_param->profSwitch; | ||||
} | } | ||||
GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index); | |||||
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(), | |||||
command.module_index); | |||||
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) { | ||||
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str()); | ||||
} | } | ||||
@@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe"; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
ProfilingManager::ProfilingManager() : is_load_profiling_(false), | |||||
is_execute_profiling_(false), | |||||
is_training_trace_(false), | |||||
subscribe_count_(0) { | |||||
ProfilingManager::ProfilingManager() | |||||
: is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) { | |||||
prof_cb_.msprofCtrlCallback = nullptr; | prof_cb_.msprofCtrlCallback = nullptr; | ||||
prof_cb_.msprofReporterCallback = nullptr; | prof_cb_.msprofReporterCallback = nullptr; | ||||
} | } | ||||
@@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
is_execute_profiling_ = true; | is_execute_profiling_ = true; | ||||
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), | |||||
prof_conf.options, options.profiling_options.c_str()); | |||||
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options, | |||||
options.profiling_options.c_str()); | |||||
} else { | } else { | ||||
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); | (void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH); | ||||
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); | (void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX); | ||||
@@ -143,6 +141,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) { | |||||
} | } | ||||
try { | try { | ||||
Json prof_options = Json::parse(options); | Json prof_options = Json::parse(options); | ||||
if (options.find(kTrainingTrace) == std::string::npos) { | |||||
return ge::SUCCESS; | |||||
} | |||||
const std::string training_trace = prof_options[kTrainingTrace]; | const std::string training_trace = prof_options[kTrainingTrace]; | ||||
if (training_trace.empty()) { | if (training_trace.empty()) { | ||||
GELOGI("Training trace will not take effect."); | GELOGI("Training trace will not take effect."); | ||||
@@ -802,32 +803,46 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP | |||||
if (!fp_point_.empty() && !bp_point_.empty()) { | if (!fp_point_.empty() && !bp_point_.empty()) { | ||||
fp_point = fp_point_; | fp_point = fp_point_; | ||||
bp_point = bp_point_; | bp_point = bp_point_; | ||||
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str()); | |||||
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), | |||||
fp_point.c_str()); | |||||
return; | return; | ||||
} | } | ||||
// ProfApi mode and training trace is set | // ProfApi mode and training trace is set | ||||
try { | |||||
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; | |||||
// Parse options first | |||||
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 }; | |||||
bool is_profiling_valid = false; | |||||
std::string profiling_options; | |||||
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS && | |||||
!profiling_options.empty()) { | |||||
is_profiling_valid = true; | |||||
} else { | |||||
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); | INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX); | ||||
if (ret != EN_OK) { | if (ret != EN_OK) { | ||||
GELOGI("PROFILING_OPTIONS env is not exist."); | GELOGI("PROFILING_OPTIONS env is not exist."); | ||||
return; | return; | ||||
} | } | ||||
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); | GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options); | ||||
Json prof_options = Json::parse(env_profiling_options); | |||||
profiling_options = env_profiling_options; | |||||
is_profiling_valid = true; | |||||
} | |||||
if (is_profiling_valid) { | |||||
try { | |||||
Json prof_options = Json::parse(profiling_options); | |||||
fp_point_ = prof_options[kFpPoint]; | |||||
bp_point_ = prof_options[kBpPoint]; | |||||
fp_point_ = prof_options[kFpPoint]; | |||||
bp_point_ = prof_options[kBpPoint]; | |||||
fp_point = fp_point_; | |||||
bp_point = bp_point_; | |||||
if (!fp_point_.empty() && !bp_point_.empty()) { | |||||
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||||
fp_point = fp_point_; | |||||
bp_point = bp_point_; | |||||
if (!fp_point_.empty() && !bp_point_.empty()) { | |||||
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str()); | |||||
} | |||||
} catch (...) { | |||||
GELOGW("Json prof options is invalid."); | |||||
return; | |||||
} | } | ||||
} catch (...) { | |||||
GELOGE(FAILED, "Json prof options is invalid."); | |||||
return; | |||||
} | } | ||||
return; | return; | ||||
} | } | ||||
@@ -36,21 +36,21 @@ using Json = nlohmann::json; | |||||
namespace { | namespace { | ||||
const std::string GE_PROFILING_MODULE = "Framework"; | const std::string GE_PROFILING_MODULE = "Framework"; | ||||
// DataTypeConfig MASK | // DataTypeConfig MASK | ||||
#define PROF_ACL_API_MASK 0x0001 | |||||
#define PROF_TASK_TIME_MASK 0x0002 | |||||
#define PROF_AICORE_METRICS_MASK 0x0004 | |||||
#define PROF_AICPU_TRACE_MASK 0x0008 | |||||
#define PROF_MODEL_EXECUTE_MASK 0x0010 | |||||
#define PROF_RUNTIME_API_MASK 0x0020 | |||||
#define PROF_RUNTIME_TRACE_MASK 0x0040 | |||||
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080 | |||||
#define PROF_SCHEDULE_TRACE_MASK 0x0100 | |||||
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200 | |||||
#define PROF_SUBTASK_TIME_MASK 0x0400 | |||||
#define PROF_TRAINING_TRACE_MASK 0x0800 | |||||
#define PROF_HCCL_TRACE_MASK 0x1000 | |||||
#define PROF_DATA_PROCESS_MASK 0x2000 | |||||
#define PROF_MODEL_LOAD_MASK 0x8000000000000000 | |||||
const uint64_t PROF_ACL_API_MASK = 0x0001; | |||||
const uint64_t PROF_TASK_TIME_MASK = 0x0002; | |||||
const uint64_t PROF_AICORE_METRICS_MASK = 0x0004; | |||||
const uint64_t PROF_AICPU_TRACE_MASK = 0x0008; | |||||
const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010; | |||||
const uint64_t PROF_RUNTIME_API_MASK = 0x0020; | |||||
const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040; | |||||
const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080; | |||||
const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100; | |||||
const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200; | |||||
const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400; | |||||
const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800; | |||||
const uint64_t PROF_HCCL_TRACE_MASK = 0x1000; | |||||
const uint64_t PROF_DATA_PROCESS_MASK = 0x2000; | |||||
const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000; | |||||
} // namespace | } // namespace | ||||
namespace ge { | namespace ge { | ||||
@@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager { | |||||
bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | bool ProfilingTrainingTraceOn() const { return is_training_trace_; } | ||||
bool ProfilingModelLoadOn() const { return is_load_profiling_; } | bool ProfilingModelLoadOn() const { return is_load_profiling_; } | ||||
bool ProfilingModelExecuteOn() const; | bool ProfilingModelExecuteOn() const; | ||||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env | |||||
// is_execute_profiling_ only used by ge option and env | |||||
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } | |||||
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | ||||
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info); | ||||
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info, | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||
@@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims, | |||||
namespace ge { | namespace ge { | ||||
bool GeExecutor::isInit_ = false; | bool GeExecutor::isInit_ = false; | ||||
class ModelListenerAdapter : public ModelListener { | |||||
public: | |||||
domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode, | |||||
std::vector<ge::OutputTensorInfo> &outputs) { | |||||
if (listener == nullptr) { | |||||
GELOGE(ge::FAILED, "listener is null."); | |||||
return FAILED; | |||||
} | |||||
return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs); | |||||
} | |||||
std::shared_ptr<ge::ModelListener> listener; | |||||
}; | |||||
static void InitOpsProtoManger() { | static void InitOpsProtoManger() { | ||||
string opsproto_path; | string opsproto_path; | ||||
@@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
// Load model | |||||
Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, | |||||
int32_t priority, std::shared_ptr<ge::ModelListener> listener) { | |||||
GELOGI("load model offline begin."); | |||||
if (!isInit_) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||||
} | |||||
string filePath = RealPath(path.c_str()); | |||||
if (filePath.empty()) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, | |||||
"File path is invalid. please check your text file '%s'.", path.c_str()); | |||||
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||||
} | |||||
std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>(); | |||||
if (listener_adapter == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | |||||
listener_adapter->listener = listener; | |||||
Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "[GeExecutor] LoadModelFromFile failed"); | |||||
return ACL_ERROR_GE_LOAD_MODEL; | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data, | |||||
std::shared_ptr<ge::ModelListener> listener) { | |||||
GELOGI("Load model begin."); | |||||
if (!isInit_) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||||
} | |||||
std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>(); | |||||
if (listener_adapter == nullptr) { | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!"); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | |||||
listener_adapter->listener = listener; | |||||
Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "[GeExecutor] LoadModel failed."); | |||||
return ACL_ERROR_GE_LOAD_MODEL; | |||||
} | |||||
return ret; | |||||
} | |||||
Status GeExecutor::UnloadModel(uint32_t model_id) { | Status GeExecutor::UnloadModel(uint32_t model_id) { | ||||
GELOGD("unload model %u begin.", model_id); | GELOGD("unload model %u begin.", model_id); | ||||
if (!isInit_) { | if (!isInit_) { | ||||
@@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) { | |||||
GELOGI("run model begin."); | |||||
if (!isInit_) { | |||||
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!"); | |||||
return ACL_ERROR_GE_EXEC_NOT_INIT; | |||||
} | |||||
InputData inputs; | |||||
GetDomiInputData(input_data, inputs); | |||||
OutputData outputs; | |||||
GetDomiOutputData(output_data, outputs); | |||||
return GraphExecutor::DataInput(inputs, outputs); | |||||
} | |||||
// Get input and output descriptor | // Get input and output descriptor | ||||
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) { | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||
@@ -39,7 +39,7 @@ namespace { | |||||
} \ | } \ | ||||
ge_tensor = MakeShared<GeTensor>(out_desc); \ | ge_tensor = MakeShared<GeTensor>(out_desc); \ | ||||
GE_CHECK_NOTNULL(ge_tensor); \ | GE_CHECK_NOTNULL(ge_tensor); \ | ||||
GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ | |||||
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\ | |||||
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ | if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \ | ||||
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ | GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \ | ||||
return MEMALLOC_FAILED; \ | return MEMALLOC_FAILED; \ | ||||
@@ -50,8 +50,7 @@ namespace { | |||||
} else { \ | } else { \ | ||||
ge_tensor = outputs[i]; \ | ge_tensor = outputs[i]; \ | ||||
GE_CHECK_NOTNULL(ge_tensor); \ | GE_CHECK_NOTNULL(ge_tensor); \ | ||||
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \ | |||||
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \ | |||||
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \ | |||||
} \ | } \ | ||||
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | auto tensor = TensorAdapter::AsTensor(*ge_tensor); \ | ||||
auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | auto tensor_name = op_desc->GetOutputNameByIndex(i); \ | ||||
@@ -563,6 +563,19 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr | |||||
GE_CHECK_NOTNULL(ge_root_model); | GE_CHECK_NOTNULL(ge_root_model); | ||||
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); | ||||
ModelHelper model_helper; | |||||
string model_name = ""; | |||||
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), | |||||
model_name); | |||||
if (name_ret != SUCCESS) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"}); | |||||
GELOGE(FAILED, "Get model_name failed. Param --output is invalid."); | |||||
return PARAM_INVALID; | |||||
} | |||||
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel(); | |||||
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()]; | |||||
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null"); | |||||
ge_model->SetName(model_name); | |||||
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Save model failed"); | GELOGE(ret, "Save model failed"); | ||||
@@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() { | |||||
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset()); | ||||
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); | ||||
if (mem_assigner->GetP2PMemOffset() > 0) { | |||||
if (mem_assigner->GetP2PMemOffset() >= 0) { | |||||
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); | MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset()); | ||||
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); | memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset); | ||||
} | } | ||||
@@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap | |||||
} | } | ||||
} | } | ||||
bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) { | |||||
bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) { | |||||
if (comp_graph == nullptr) { | if (comp_graph == nullptr) { | ||||
return false; | return false; | ||||
} | } | ||||
std::set<int64_t> stream_set; | std::set<int64_t> stream_set; | ||||
std::set<std::string> label_set; | |||||
for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) { | for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) { | ||||
GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue); | GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue); | ||||
int64_t stream_id = cur_node->GetOpDesc()->GetStreamId(); | int64_t stream_id = cur_node->GetOpDesc()->GetStreamId(); | ||||
if (stream_id == kInvalidStream) { | if (stream_id == kInvalidStream) { | ||||
continue; | continue; | ||||
} | } | ||||
GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(), | |||||
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); | |||||
stream_set.insert(stream_id); | stream_set.insert(stream_id); | ||||
std::string batch_label; | |||||
if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
label_set.insert(batch_label); | |||||
} else { | |||||
GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(), | |||||
cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id); | |||||
continue; | |||||
} | |||||
GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(), | |||||
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize()); | |||||
} | } | ||||
if (stream_set.size() > 1) { | |||||
GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.", | |||||
if (stream_set.size() > 1 || label_set.size() > 1) { | |||||
GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.", | |||||
comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size()); | comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size()); | ||||
return false; | return false; | ||||
} | } | ||||
if (!label_set.empty()) { | |||||
(void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin()); | |||||
} | |||||
return true; | return true; | ||||
} | } | ||||
@@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||||
continue; | continue; | ||||
} | } | ||||
if (!IsSameStreamId(subgraph)) { | |||||
GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str()); | |||||
if (!IsSameStreamIdOrBatchLabel(subgraph)) { | |||||
GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str()); | |||||
continue; | continue; | ||||
} | } | ||||
OpDescPtr op_desc = nodes.at(0)->GetOpDesc(); | OpDescPtr op_desc = nodes.at(0)->GetOpDesc(); | ||||
@@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
run_context.stream = run_context.graphStreamList[stream_id]; | run_context.stream = run_context.graphStreamList[stream_id]; | ||||
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.", | |||||
subgraph->GetName().c_str(), engine_name.c_str(), stream_id, | |||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream))); | |||||
std::string batch_label; | |||||
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label); | |||||
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, " | |||||
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id, | |||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str()); | |||||
for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { | for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) { | ||||
GE_CHECK_NOTNULL(*iter); | GE_CHECK_NOTNULL(*iter); | ||||
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); | Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context); | ||||
@@ -41,7 +41,7 @@ class StreamGraphOptimizer { | |||||
private: | private: | ||||
void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map); | void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map); | ||||
bool IsSameStreamId(const ComputeGraphPtr &comp_graph); | |||||
bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph); | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_ | #endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_ |
@@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_ | |||||
continue; | continue; | ||||
} | } | ||||
string op_type = op_desc->GetType(); | string op_type = op_desc->GetType(); | ||||
if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) { | |||||
if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) { | |||||
continuous_op_lists.emplace_back(vector<OpDescPtr>()); | continuous_op_lists.emplace_back(vector<OpDescPtr>()); | ||||
} else { | } else { | ||||
continuous_op_lists.back().emplace_back(op_desc); | continuous_op_lists.back().emplace_back(op_desc); | ||||
@@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||||
ModelData &model_data) { | ModelData &model_data) { | ||||
Status ret; | Status ret; | ||||
if (!CheckInputPathValid(path)) { | if (!CheckInputPathValid(path)) { | ||||
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||||
return GE_EXEC_MODEL_PATH_INVALID; | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str()); | |||||
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID; | |||||
} | } | ||||
GELOGI("Load model begin, model path is: %s", path.c_str()); | GELOGI("Load model begin, model path is: %s", path.c_str()); | ||||
if (!key_path.empty() && !CheckInputPathValid(key_path)) { | if (!key_path.empty() && !CheckInputPathValid(key_path)) { | ||||
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||||
return GE_EXEC_MODEL_KEY_PATH_INVALID; | |||||
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str()); | |||||
return ACL_ERROR_GE_PARAM_INVALID; | |||||
} | } | ||||
ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data); | ||||
@@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, | |||||
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) { | |||||
Status ret; | |||||
ModelData model_data; | |||||
ret = LoadDataFromFile(path, key_path, priority, model_data); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret); | |||||
if (model_data.model_data != nullptr) { | |||||
delete[] static_cast<char *>(model_data.model_data); | |||||
model_data.model_data = nullptr; | |||||
} | |||||
return ret; | |||||
} | |||||
ret = LoadModel(model_data, listener, model_id); | |||||
if (ret != SUCCESS) { | |||||
GELOGE(ret, "LoadModel: Load failed. ret = %u", ret); | |||||
if (model_data.model_data != nullptr) { | |||||
delete[] static_cast<char *>(model_data.model_data); | |||||
model_data.model_data = nullptr; | |||||
} | |||||
} | |||||
if (model_data.model_data != nullptr) { | |||||
delete[] static_cast<char *>(model_data.model_data); | |||||
model_data.model_data = nullptr; | |||||
} | |||||
return ret; | |||||
} | |||||
Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener, | |||||
uint32_t &model_id) { | |||||
GELOGI("Load model begin, model_id:%u.", model_id); | |||||
// For GeOp, Open Device 0 here. | |||||
GE_CHK_RT_RET(rtSetDevice(0)); | |||||
auto model_manager = ModelManager::GetInstance(); | |||||
GE_CHECK_NOTNULL(model_manager); | |||||
Status ret = model_manager->LoadModelOffline(model_id, model_data, listener); | |||||
if (ret != SUCCESS) { | |||||
GE_CHK_RT(rtDeviceReset(0)); | |||||
GELOGE(ret, "LoadModel: Load failed."); | |||||
return ret; | |||||
} | |||||
ret = model_manager->Start(model_id); | |||||
if (ret != SUCCESS) { | |||||
if (model_manager->Unload(model_id) != SUCCESS) { | |||||
GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start."); | |||||
} | |||||
GELOGE(ret, "LoadModel: Start failed."); | |||||
return ret; | |||||
} | |||||
GELOGI("LoadModel: Start model success, model_id:%u.", model_id); | |||||
return SUCCESS; | |||||
} | |||||
Status GraphLoader::CommandHandle(const Command &command) { | Status GraphLoader::CommandHandle(const Command &command) { | ||||
try { | try { | ||||
auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
@@ -225,16 +168,16 @@ Status GraphLoader::CommandHandle(const Command &command) { | |||||
} | } | ||||
Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, | Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr, | ||||
size_t memsize, void *weight_ptr, size_t weightsize) { | |||||
size_t mem_size, void *weight_ptr, size_t weight_size) { | |||||
GELOGI("Load model begin, model_id:%u.", model_id); | GELOGI("Load model begin, model_id:%u.", model_id); | ||||
// For ACL, Open Device from App. | // For ACL, Open Device from App. | ||||
auto model_manager = ModelManager::GetInstance(); | auto model_manager = ModelManager::GetInstance(); | ||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
Status ret = model_manager->LoadModelOffline( | Status ret = model_manager->LoadModelOffline( | ||||
model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize); | |||||
model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Load model failed, model_id:%u.", model_id); | |||||
return ret; | |||||
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id); | |||||
return ACL_ERROR_GE_LOAD_MODEL; | |||||
} | } | ||||
GELOGI("Load model success, model_id:%u.", model_id); | GELOGI("Load model success, model_id:%u.", model_id); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -259,8 +202,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da | |||||
GE_CHECK_NOTNULL(model_manager); | GE_CHECK_NOTNULL(model_manager); | ||||
Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id); | |||||
return ret; | |||||
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id); | |||||
return ACL_ERROR_GE_LOAD_MODEL; | |||||
} | } | ||||
GELOGI("Load model with queue success, model_id:%u.", model_id); | GELOGI("Load model with queue success, model_id:%u.", model_id); | ||||
@@ -44,12 +44,6 @@ class GraphLoader { | |||||
static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); | static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size); | ||||
static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener, | |||||
uint32_t &model_id); | |||||
static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority, | |||||
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id); | |||||
static Status CommandHandle(const Command &command); | static Status CommandHandle(const Command &command); | ||||
static Status GetMemoryInfo(int64_t &free); | static Status GetMemoryInfo(int64_t &free); | ||||
@@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis | |||||
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | ||||
output.mutable_shape()->add_dim(dim); | output.mutable_shape()->add_dim(dim); | ||||
} | } | ||||
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { | |||||
output.mutable_origin_shape()->add_dim(dim); | |||||
} | |||||
int64_t output_size = 0; | int64_t output_size = 0; | ||||
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { | if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) { | ||||
GELOGE(PARAM_INVALID, "Get output size filed"); | GELOGE(PARAM_INVALID, "Get output size filed"); | ||||
@@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor | |||||
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | for (auto dim : tensor_descs.at(index).GetShape().GetDims()) { | ||||
input.mutable_shape()->add_dim(dim); | input.mutable_shape()->add_dim(dim); | ||||
} | } | ||||
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) { | |||||
input.mutable_origin_shape()->add_dim(dim); | |||||
} | |||||
int64_t input_size = 0; | int64_t input_size = 0; | ||||
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) { | ||||
GELOGI("Get aipp input size according to attr is %ld", input_size); | GELOGI("Get aipp input size according to attr is %ld", input_size); | ||||
@@ -289,8 +289,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||||
if (weight_ptr == nullptr) { | if (weight_ptr == nullptr) { | ||||
weights_mem_base_ = MallocWeightsMem(weights_size); | weights_mem_base_ = MallocWeightsMem(weights_size); | ||||
if (weights_mem_base_ == nullptr) { | if (weights_mem_base_ == nullptr) { | ||||
GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size); | |||||
return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc weight memory failed. size: %zu", weights_size); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
is_inner_weight_base_ = true; | is_inner_weight_base_ = true; | ||||
} | } | ||||
@@ -307,8 +307,8 @@ Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weigh | |||||
Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | ||||
if (is_feature_map_mem_has_inited_) { | if (is_feature_map_mem_has_inited_) { | ||||
GELOGE(FAILED, "call InitFeatureMapMem more than once ."); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "call InitFeatureMapMem more than once ."); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
is_feature_map_mem_has_inited_ = true; | is_feature_map_mem_has_inited_ = true; | ||||
@@ -316,8 +316,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size; | ||||
if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) { | ||||
GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||||
return FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize()); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
mem_base_ = static_cast<uint8_t *>(dev_ptr); | mem_base_ = static_cast<uint8_t *>(dev_ptr); | ||||
@@ -327,8 +327,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
if (TotalMemSize() && mem_base_ == nullptr) { | if (TotalMemSize() && mem_base_ == nullptr) { | ||||
mem_base_ = MallocFeatureMapMem(data_size); | mem_base_ = MallocFeatureMapMem(data_size); | ||||
if (mem_base_ == nullptr) { | if (mem_base_ == nullptr) { | ||||
GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size); | |||||
return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc feature map memory failed. size: %zu", data_size); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", | GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", | ||||
runtime_param_.graph_id, mem_base_, data_size); | runtime_param_.graph_id, mem_base_, data_size); | ||||
@@ -343,8 +343,8 @@ Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) { | |||||
if (p2p_data_size != 0) { | if (p2p_data_size != 0) { | ||||
p2p_mem_base_ = MallocP2PMem(p2p_data_size); | p2p_mem_base_ = MallocP2PMem(p2p_data_size); | ||||
if (p2p_mem_base_ == nullptr) { | if (p2p_mem_base_ == nullptr) { | ||||
GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size); | |||||
return GE_EXEC_ALLOC_P2P_MEM_FAILED; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc p2p memory failed,size: %zu", p2p_data_size); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id, | ||||
p2p_mem_base_, p2p_data_size); | p2p_mem_base_, p2p_data_size); | ||||
@@ -710,6 +710,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
} | } | ||||
// collect profiling for ge | // collect profiling for ge | ||||
GE_CHK_STATUS_RET(InitModelProfile(), "Init model profile failed"); | |||||
auto &profiling_manager = ProfilingManager::Instance(); | auto &profiling_manager = ProfilingManager::Instance(); | ||||
if (profiling_manager.ProfilingModelLoadOn()) { | if (profiling_manager.ProfilingModelLoadOn()) { | ||||
Status p_ret = ReportProfilingData(); | Status p_ret = ReportProfilingData(); | ||||
@@ -970,7 +971,7 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
uint32_t parent_index = 0; // Ignore subgraph Data Node. | uint32_t parent_index = 0; // Ignore subgraph Data Node. | ||||
if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | if (AttrUtils::GetInt(op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { | ||||
GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | GELOGI("Init zero copy by subgraph Data node: %s.", op_desc->GetName().c_str()); | ||||
return InitInputBatchLabel(node); | |||||
return SUCCESS; | |||||
} | } | ||||
data_op_list_.push_back(op_desc); | data_op_list_.push_back(op_desc); | ||||
@@ -1011,10 +1012,6 @@ Status DavinciModel::InitDataOp(const NodePtr &node, uint32_t &data_op_index, ma | |||||
} | } | ||||
data_op_index++; | data_op_index++; | ||||
if (InitInputZeroCopyNodes(node) != SUCCESS) { | |||||
GELOGE(PARAM_INVALID, "Input zero copy nodes init failed!"); | |||||
return PARAM_INVALID; | |||||
} | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1036,39 +1033,6 @@ void DavinciModel::AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_inde | |||||
} | } | ||||
} | } | ||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitInputZeroCopyNodes(const NodePtr &node) { | |||||
auto out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||||
if (out_data_anchor == nullptr) { | |||||
GELOGE(FAILED, "Out data anchor is nullptr"); | |||||
return FAILED; | |||||
} | |||||
for (auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
auto node = peer_in_data_anchor->GetOwnerNode(); | |||||
auto op_desc = node->GetOpDesc(); | |||||
if (op_desc == nullptr) { | |||||
GELOGE(FAILED, "Op desc is nullptr"); | |||||
return FAILED; | |||||
} | |||||
string batch_label; | |||||
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||||
if (batch_label.empty()) { | |||||
batch_label = kDefaultBatchLable; | |||||
} | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||||
GELOGD("Init input zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", op_desc->GetName().c_str(), | |||||
op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
bool getnext_sink_dynamic = false; | bool getnext_sink_dynamic = false; | ||||
if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | if (ge::AttrUtils::GetBool(op_desc, ATTR_GETNEXT_SINK_DYNMAIC, getnext_sink_dynamic) && getnext_sink_dynamic) { | ||||
@@ -1094,7 +1058,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
if (owner_graph->GetParentGraph() != nullptr) { | if (owner_graph->GetParentGraph() != nullptr) { | ||||
GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | GELOGI("Init zero copy by subgraph NetOutput node: %s.", op_desc->GetName().c_str()); | ||||
op_list_.erase(op_desc->GetId()); | op_list_.erase(op_desc->GetId()); | ||||
return InitOutputBatchLabel(node); | |||||
return SUCCESS; | |||||
} | } | ||||
output_op_list_.push_back(op_desc); | output_op_list_.push_back(op_desc); | ||||
@@ -1146,8 +1110,6 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) { | |||||
} | } | ||||
} | } | ||||
GE_IF_BOOL_EXEC(InitOutputZeroCopyNodes(node) != SUCCESS, | |||||
GELOGE(PARAM_INVALID, "Output zero copy nodes init failed!"); return PARAM_INVALID;); | |||||
GetAllGearsInfo(node); | GetAllGearsInfo(node); | ||||
if (is_getnext_sink_dynamic_) { | if (is_getnext_sink_dynamic_) { | ||||
GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | GE_IF_BOOL_EXEC(GetGetDynamicDimsNodeInfo(node) != SUCCESS, | ||||
@@ -1343,121 +1305,6 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info | |||||
} | } | ||||
} | } | ||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitOutputZeroCopyNodes(const NodePtr &node) { | |||||
set<NodePtr> nodes_need_record; | |||||
for (auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
if (peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
nodes_need_record.emplace(peer_node); | |||||
// Merge node output multiplexed input, upstream nodes need to be considered in multiple batch scenarios | |||||
if (peer_node->GetType() == MERGE) { | |||||
for (const auto &merge_peer_in_data_anchor : peer_node->GetAllInDataAnchors()) { | |||||
auto merge_peer_out_data_anchor = merge_peer_in_data_anchor->GetPeerOutAnchor(); | |||||
if (merge_peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
auto merge_peer_node = merge_peer_out_data_anchor->GetOwnerNode(); | |||||
nodes_need_record.emplace(merge_peer_node); | |||||
} | |||||
} else { | |||||
for (const auto &other_in_data_anchor : peer_out_data_anchor->GetPeerInDataAnchors()) { | |||||
auto other_in_node = other_in_data_anchor->GetOwnerNode(); | |||||
if (other_in_node->GetType() != NETOUTPUT) { | |||||
nodes_need_record.emplace(other_in_node); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
for (const auto &node_need_record : nodes_need_record) { | |||||
auto op_desc = node_need_record->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
string batch_label; | |||||
(void)ge::AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label); | |||||
if (batch_label.empty()) { | |||||
batch_label = kDefaultBatchLable; | |||||
} | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_.emplace(pair<int64_t, string>(op_desc->GetId(), batch_label)); | |||||
GELOGD("Init Output zero copy nodes success, op name:%s, op id: %ld, batch label: %s.", | |||||
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitInputBatchLabel(const NodePtr &node) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
return SUCCESS; // Not Multi-batch. | |||||
} | |||||
const auto &out_data_anchor = node->GetOutDataAnchor(kDataIndex); | |||||
GE_CHECK_NOTNULL(out_data_anchor); | |||||
for (const auto &peer_in_data_anchor : out_data_anchor->GetPeerInDataAnchors()) { | |||||
const auto &node = peer_in_data_anchor->GetOwnerNode(); | |||||
const auto &op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||||
GELOGD("Init input zero copy nodes success, op name: %s, op id: %ld, batch label: %s", op_desc->GetName().c_str(), | |||||
op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status DavinciModel::InitOutputBatchLabel(const NodePtr &node) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) { | |||||
return SUCCESS; // Not Multi-batch. | |||||
} | |||||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | |||||
const auto &peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); | |||||
if (peer_out_data_anchor == nullptr) { | |||||
continue; | |||||
} | |||||
const auto &peer_node = peer_out_data_anchor->GetOwnerNode(); | |||||
const auto &op_desc = peer_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
if (zero_copy_op_id_batch_label_.find(op_desc->GetId()) == zero_copy_op_id_batch_label_.end()) { | |||||
zero_copy_op_id_batch_label_[op_desc->GetId()] = batch_label; | |||||
GELOGD("Init Output zero copy nodes success, op name: %s, op id: %ld, batch label: %s", | |||||
op_desc->GetName().c_str(), op_desc->GetId(), batch_label.c_str()); | |||||
} | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief LabelSet Op Initialize. | /// @brief LabelSet Op Initialize. | ||||
/// @param [in] op_desc: LabelSet Op descriptor. | /// @param [in] op_desc: LabelSet Op descriptor. | ||||
@@ -2240,12 +2087,61 @@ Status DavinciModel::SyncVarData() { | |||||
return ret; | return ret; | ||||
} | } | ||||
inline int64_t SumSize(const vector<int64_t> &size_list) { | |||||
int64_t sum_size = 0; | |||||
for (const int64_t &size : size_list) { | |||||
sum_size += size; | |||||
Status DavinciModel::InitModelProfile() { | |||||
for (const auto &task : task_list_) { | |||||
GE_CHECK_NOTNULL(task); | |||||
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); | |||||
// when type is RT_MODEL_TASK_KERNEL, ctx is not null | |||||
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { | |||||
continue; | |||||
} | |||||
GELOGI("task.id = %u, opNum = %zu", task->GetTaskID(), fusion_op_info->original_op_names.size()); | |||||
op_id_map_.insert(std::make_pair(fusion_op_info->op_index, task->GetTaskID())); | |||||
} | |||||
std::set<uint32_t> task_id_set; | |||||
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | |||||
using Range = std::pair<CIT, CIT>; | |||||
for (const auto &task : task_list_) { | |||||
GE_CHECK_NOTNULL(task); | |||||
const FusionOpInfo *fusion_op_info = task->GetFusionOpInfo(); | |||||
if ((fusion_op_info == nullptr) || fusion_op_info->original_op_names.empty()) { | |||||
continue; | |||||
} | |||||
if (task_id_set.count(task->GetTaskID()) > 0) { | |||||
continue; | |||||
} | |||||
const auto &op_desc = GetOpByIndex(fusion_op_info->op_index); | |||||
GE_CHK_BOOL_EXEC(op_desc != nullptr, return FAILED, "index: %u out of range", fusion_op_info->op_index); | |||||
ProfileInfo profile; | |||||
profile.fusion_info = *fusion_op_info; | |||||
Range range = op_id_map_.equal_range(fusion_op_info->op_index); | |||||
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { | |||||
profile.task_count++; | |||||
task_id_set.insert(range_idx->second); | |||||
} | |||||
// memory info | |||||
TaskMemInfo &mem_info = profile.memory_info; | |||||
const auto input_size = ModelUtils::GetInputSize(op_desc); | |||||
const auto output_size = ModelUtils::GetOutputSize(op_desc); | |||||
const auto workspace_size = ModelUtils::GetWorkspaceSize(op_desc); | |||||
const auto weight_size = ModelUtils::GetWeightSize(op_desc); | |||||
mem_info.input_size = std::accumulate(input_size.begin(), input_size.end(), 0); | |||||
mem_info.output_size = std::accumulate(output_size.begin(), output_size.end(), 0); | |||||
mem_info.workspace_size = std::accumulate(workspace_size.begin(), workspace_size.end(), 0); | |||||
mem_info.weight_size = std::accumulate(weight_size.begin(), weight_size.end(), 0); | |||||
mem_info.total_size = mem_info.weight_size + mem_info.input_size + mem_info.output_size + mem_info.workspace_size; | |||||
profile_list_.emplace_back(profile); | |||||
} | } | ||||
return sum_size; | |||||
GELOGI("fusion task size: %zu, profile info size: %zu", op_id_map_.size(), profile_list_.size()); | |||||
return SUCCESS; | |||||
} | } | ||||
Status DavinciModel::SinkModelProfile() { | Status DavinciModel::SinkModelProfile() { | ||||
@@ -2253,18 +2149,12 @@ Status DavinciModel::SinkModelProfile() { | |||||
auto &prof_mgr = ProfilingManager::Instance(); | auto &prof_mgr = ProfilingManager::Instance(); | ||||
ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
// report model data tag name | // report model data tag name | ||||
std::string tag_name; | |||||
tag_name.append("model_load_info_").append(std::to_string(this->Id())); | |||||
std::string tag_name("model_load_info_" + std::to_string(this->Id())); | |||||
GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | GE_CHK_BOOL_EXEC(memcpy_s(reporter_data.tag, MSPROF_ENGINE_MAX_TAG_LEN, tag_name.c_str(), tag_name.size()) == EOK, | ||||
return FAILED, "Sink model tag memcpy error."); | return FAILED, "Sink model tag memcpy error."); | ||||
// Model Header | // Model Header | ||||
string name; | |||||
if (!om_name_.empty()) { | |||||
name = om_name_; | |||||
} else { | |||||
name = name_; | |||||
} | |||||
std::string name = om_name_.empty() ? name_ : om_name_; | |||||
size_t name_len = name.size(); | size_t name_len = name.size(); | ||||
reporter_data.deviceId = device_id_; | reporter_data.deviceId = device_id_; | ||||
reporter_data.data = (unsigned char *)&name_len; | reporter_data.data = (unsigned char *)&name_len; | ||||
@@ -2296,128 +2186,71 @@ Status DavinciModel::SinkModelProfile() { | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | ||||
"Reporter data fail, model id:%u.", this->Id()); | "Reporter data fail, model id:%u.", this->Id()); | ||||
int32_t task_num = task_list_.size(); | |||||
std::multimap<uint32_t, uint32_t> op_id_map; | |||||
std::set<uint32_t> task_id_set; | |||||
for (int32_t i = 0; i < task_num; i++) { | |||||
auto task = task_list_[i]; | |||||
GE_CHECK_NOTNULL(task); | |||||
auto fusion_op_info = task->GetFusionOpInfo(); | |||||
// when type is RT_MODEL_TASK_KERNEL, ctx is not null | |||||
if (fusion_op_info != nullptr) { | |||||
uint32_t op_num = fusion_op_info->original_op_names.size(); | |||||
uint32_t task_id = task->GetTaskID(); | |||||
if (op_num > 0) { | |||||
GELOGI("task.id = %u, opNum = %u", task_id, op_num); | |||||
op_id_map.insert(std::make_pair(fusion_op_info->op_index, task_id)); | |||||
} | |||||
} | |||||
} | |||||
struct memoryInfo { | |||||
int64_t input_size; | |||||
int64_t output_size; | |||||
int64_t weight_size; | |||||
int64_t workspace_size; | |||||
int64_t total_size; | |||||
memoryInfo() : input_size(0), output_size(0), weight_size(0), workspace_size(0), total_size(0) {} | |||||
}; | |||||
using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | using CIT = std::multimap<uint32_t, uint32_t>::const_iterator; | ||||
using Range = std::pair<CIT, CIT>; | using Range = std::pair<CIT, CIT>; | ||||
for (int32_t i = 0; i < task_num; i++) { | |||||
auto task = task_list_[i]; | |||||
GE_CHECK_NOTNULL(task); | |||||
auto fusion_op_info = task->GetFusionOpInfo(); | |||||
if (fusion_op_info != nullptr && fusion_op_info->original_op_names.size() > 0) { | |||||
uint32_t task_id = task->GetTaskID(); | |||||
uint32_t op_num = fusion_op_info->original_op_names.size(); | |||||
uint32_t task_count = 0; | |||||
if (task_id_set.count(task_id) != 0) { | |||||
continue; | |||||
} | |||||
uint32_t op_id = fusion_op_info->op_index; | |||||
Range range = op_id_map.equal_range(op_id); | |||||
for (CIT range_idx = range.first; range_idx != range.second; ++range_idx) { | |||||
task_count++; | |||||
uint32_t task_id = range_idx->second; | |||||
task_id_set.insert(task_id); | |||||
} | |||||
// op name after fusion | |||||
string fusion_op_name = fusion_op_info->op_name; | |||||
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||||
reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||||
for (const ProfileInfo &profile : profile_list_) { | |||||
// op name after fusion | |||||
string fusion_op_name = profile.fusion_info.op_name; | |||||
int32_t fusion_op_name_len = fusion_op_name.size() == 0 ? 1 : fusion_op_name.size(); | |||||
reporter_data.data = (unsigned char *)&fusion_op_name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||||
reporter_data.dataLen = fusion_op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// original op name before fusion | |||||
uint32_t op_num = profile.fusion_info.original_op_names.size(); | |||||
reporter_data.data = (unsigned char *)&op_num; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
for (uint32_t k = 0; k < op_num; k++) { | |||||
std::string op_name = profile.fusion_info.original_op_names[k]; | |||||
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||||
reporter_data.data = (unsigned char *)&op_name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | reporter_data.dataLen = sizeof(int32_t); | ||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | ||||
"Reporter data fail, model id:%u.", this->Id()); | "Reporter data fail, model id:%u.", this->Id()); | ||||
reporter_data.data = (unsigned char *)fusion_op_name.c_str(); | |||||
reporter_data.dataLen = fusion_op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// original op name before fusion | |||||
reporter_data.data = (unsigned char *)&op_num; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
for (uint32_t k = 0; k < op_num; k++) { | |||||
std::string op_name = fusion_op_info->original_op_names[k]; | |||||
int32_t op_name_len = op_name.size() == 0 ? 1 : op_name.size(); | |||||
reporter_data.data = (unsigned char *)&op_name_len; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
reporter_data.data = (unsigned char *)op_name.c_str(); | |||||
reporter_data.dataLen = op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
} | |||||
// stream id info | |||||
uint32_t streamId = task->GetStreamId(); | |||||
reporter_data.data = (unsigned char *)&streamId; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// memory info | |||||
struct memoryInfo memory_info; | |||||
uint32_t op_index = fusion_op_info->op_index; | |||||
auto iter = op_list_.find(op_index); | |||||
GE_CHK_BOOL_EXEC(iter != op_list_.end(), return FAILED, "index is out of range, index: %u", op_index); | |||||
auto op_desc = iter->second; | |||||
memory_info.input_size = SumSize(ModelUtils::GetInputSize(op_desc)); | |||||
memory_info.output_size = SumSize(ModelUtils::GetOutputSize(op_desc)); | |||||
memory_info.workspace_size = SumSize(ModelUtils::GetWorkspaceSize(op_desc)); | |||||
memory_info.weight_size = SumSize(ModelUtils::GetWeightSize(op_desc)); | |||||
memory_info.total_size = | |||||
memory_info.weight_size + memory_info.input_size + memory_info.output_size + memory_info.workspace_size; | |||||
reporter_data.data = (unsigned char *)&memory_info; | |||||
reporter_data.dataLen = sizeof(struct memoryInfo); | |||||
reporter_data.data = (unsigned char *)op_name.c_str(); | |||||
reporter_data.dataLen = op_name_len; | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | ||||
"Reporter data fail, model id:%u.", this->Id()); | "Reporter data fail, model id:%u.", this->Id()); | ||||
} | |||||
// task info | |||||
reporter_data.data = (unsigned char *)&task_count; | |||||
// stream id info | |||||
uint32_t streamId = profile.fusion_info.stream_id; | |||||
reporter_data.data = (unsigned char *)&streamId; | |||||
reporter_data.dataLen = sizeof(int32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// memory info | |||||
reporter_data.data = (unsigned char *)&profile.memory_info; | |||||
reporter_data.dataLen = sizeof(profile.memory_info); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
// task info | |||||
reporter_data.data = (unsigned char *)&profile.task_count; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
Range task_range = op_id_map_.equal_range(profile.fusion_info.op_index); | |||||
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||||
uint32_t task_id = idx->second; | |||||
reporter_data.data = (unsigned char *)&task_id; | |||||
reporter_data.dataLen = sizeof(uint32_t); | reporter_data.dataLen = sizeof(uint32_t); | ||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | ||||
"Reporter data fail, model id:%u.", this->Id()); | "Reporter data fail, model id:%u.", this->Id()); | ||||
Range task_range = op_id_map.equal_range(op_id); | |||||
for (CIT idx = task_range.first; idx != task_range.second; ++idx) { | |||||
uint32_t task_id = idx->second; | |||||
reporter_data.data = (unsigned char *)&task_id; | |||||
reporter_data.dataLen = sizeof(uint32_t); | |||||
GE_CHK_BOOL_EXEC(prof_mgr.CallMsprofReport(reporter_data) == 0, return FAILED, | |||||
"Reporter data fail, model id:%u.", this->Id()); | |||||
} | |||||
} | } | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2991,19 +2824,19 @@ Status DavinciModel::CreateKnownZeroCopyMap(const vector<void *> &inputs, const | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status DavinciModel::UpdateKnownZeroCopyAddr() { | |||||
for (size_t i = 0; i < total_io_addrs_.size(); ++i) { | |||||
auto it_in = knonw_input_data_info_.find(total_io_addrs_[i]); | |||||
Status DavinciModel::UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs) { | |||||
for (size_t i = 0; i < total_io_addrs.size(); ++i) { | |||||
auto it_in = knonw_input_data_info_.find(total_io_addrs[i]); | |||||
if (it_in != knonw_input_data_info_.end()) { | if (it_in != knonw_input_data_info_.end()) { | ||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
knonw_input_data_info_.at(total_io_addrs_[i])); | |||||
total_io_addrs_[i] = knonw_input_data_info_.at(total_io_addrs_[i]); | |||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr input %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
knonw_input_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = knonw_input_data_info_.at(total_io_addrs[i]); | |||||
} | } | ||||
auto it_out = knonw_output_data_info_.find(total_io_addrs_[i]); | |||||
auto it_out = knonw_output_data_info_.find(total_io_addrs[i]); | |||||
if (it_out != knonw_output_data_info_.end()) { | if (it_out != knonw_output_data_info_.end()) { | ||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs_[i], | |||||
knonw_output_data_info_.at(total_io_addrs_[i])); | |||||
total_io_addrs_[i] = knonw_output_data_info_.at(total_io_addrs_[i]); | |||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr output %zu,v addr %p,p addr %p .", i, total_io_addrs[i], | |||||
knonw_output_data_info_.at(total_io_addrs[i])); | |||||
total_io_addrs[i] = knonw_output_data_info_.at(total_io_addrs[i]); | |||||
} | } | ||||
} | } | ||||
GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | GELOGI("DavinciModel::UpdateKnownZeroCopyAddr success."); | ||||
@@ -3032,7 +2865,7 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec | |||||
} else { | } else { | ||||
total_io_addrs_ = orig_total_io_addrs_; | total_io_addrs_ = orig_total_io_addrs_; | ||||
} | } | ||||
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
GE_CHK_STATUS_RET(UpdateKnownZeroCopyAddr(total_io_addrs_), "DavinciModel::UpdateKnownZeroCopyAddr failed."); | |||||
if (total_args_size_ == 0) { | if (total_args_size_ == 0) { | ||||
GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | GELOGW("DavinciModel::UpdateKnownNodeArgs device args %p, dst size %u, pass rtMemcpy.", args_, total_args_size_); | ||||
@@ -3099,7 +2932,14 @@ Status DavinciModel::MallocKnownArgs() { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
// malloc dynamic and static hybrid memory | |||||
if (total_hybrid_args_size_ != 0) { | |||||
rt_ret = rtMalloc(&hybrid_addrs_, total_hybrid_args_size_, RT_MEMORY_HBM); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rtMalloc failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | |||||
} | |||||
// malloc fixed addr memory, eg: rts op | // malloc fixed addr memory, eg: rts op | ||||
if (total_fixed_addr_size_ != 0) { | if (total_fixed_addr_size_ != 0) { | ||||
GELOGI("Begin to allocate fixed addr."); | GELOGI("Begin to allocate fixed addr."); | ||||
@@ -3257,27 +3097,20 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
for (auto &input_outside_addrs : new_input_outside_addrs_) { | for (auto &input_outside_addrs : new_input_outside_addrs_) { | ||||
ZeroCopyOffset &input_outside = input_outside_addrs.second; | ZeroCopyOffset &input_outside = input_outside_addrs.second; | ||||
bool ret = input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
if (ret) { | |||||
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||||
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||||
} | |||||
input_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
} | } | ||||
for (auto &output_outside_addrs : new_output_outside_addrs_) { | for (auto &output_outside_addrs : new_output_outside_addrs_) { | ||||
ZeroCopyOffset &output_outside = output_outside_addrs.second; | ZeroCopyOffset &output_outside = output_outside_addrs.second; | ||||
bool ret = output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
if (ret) { | |||||
void *args_val = static_cast<uint8_t *>(args) + offset + i * kAddrLen; | |||||
SetBatchLabelAddr(op_desc, reinterpret_cast<uintptr_t>(args_val)); | |||||
} | |||||
output_outside.SetOutsideAddrsValue(zero_copy_task, outside_addrs[i], args, offset + i * kAddrLen); | |||||
} | } | ||||
} | } | ||||
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||||
if (it == zero_copy_op_id_batch_label_.end()) { | |||||
string batch_label; | |||||
if (!AttrUtils::GetStr(op_desc, ATTR_NAME_BATCH_LABEL, batch_label) || batch_label.empty()) { | |||||
zero_copy_task.SetBatchLabel(kDefaultBatchLable); | zero_copy_task.SetBatchLabel(kDefaultBatchLable); | ||||
} else { | } else { | ||||
zero_copy_task.SetBatchLabel(it->second); | |||||
zero_copy_task.SetBatchLabel(batch_label); | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | std::lock_guard<std::mutex> lock(outside_addrs_mutex_); | ||||
@@ -3287,27 +3120,6 @@ void DavinciModel::SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector<v | |||||
} | } | ||||
} | } | ||||
void DavinciModel::SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr) { | |||||
// Establish a mapping between batch label and zero copy address for multi-batch scenes | |||||
auto it = zero_copy_op_id_batch_label_.find(op_desc->GetId()); | |||||
if (it == zero_copy_op_id_batch_label_.end()) { | |||||
return; | |||||
} | |||||
const string &batch_label = it->second; | |||||
auto iter = zero_copy_batch_label_addrs_.find(batch_label); | |||||
if (iter != zero_copy_batch_label_addrs_.end()) { | |||||
iter->second.insert(addr); | |||||
GELOGD("[ZCPY] Set zero copy batch label and addrs success, batch label: %s, op name:%s.", batch_label.c_str(), | |||||
op_desc->GetName().c_str()); | |||||
} else { | |||||
set<uintptr_t> addrs = {addr}; | |||||
zero_copy_batch_label_addrs_.emplace(pair<string, set<uintptr_t>>(batch_label, addrs)); | |||||
GELOGD("[ZCPY] New added zero copy batch label and addrs success, batch label: %s, op name:%s.", | |||||
batch_label.c_str(), op_desc->GetName().c_str()); | |||||
} | |||||
} | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
@@ -3441,15 +3253,15 @@ Status DavinciModel::UpdateIoTaskArgs(const std::map<uint32_t, ZeroCopyOffset> & | |||||
void *addr = data.second.GetDataInfo().at(count).second; | void *addr = data.second.GetDataInfo().at(count).second; | ||||
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) + | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data) + | ||||
data.second.GetRelativeOffset().at(count)); | data.second.GetRelativeOffset().at(count)); | ||||
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p", input_or_output.c_str(), | |||||
data.first, addr, size, buffer_addr); | |||||
GELOGI("[ZCPY] Copy %s blobs_index %u, virtual_addr: %p, size: %ld, user_data_addr: %p, batch_label: %s", | |||||
input_or_output.c_str(), data.first, addr, size, buffer_addr, batch_label.c_str()); | |||||
// For input data, just copy for rts task. | // For input data, just copy for rts task. | ||||
for (ZeroCopyTask &task : zero_copy_tasks_) { | for (ZeroCopyTask &task : zero_copy_tasks_) { | ||||
if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | if (task.GetBatchLabel() != kDefaultBatchLable && task.GetBatchLabel() != batch_label) { | ||||
continue; | continue; | ||||
} | } | ||||
uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | uintptr_t addr_val = reinterpret_cast<uintptr_t>(addr); | ||||
if (task.UpdateTaskParam(addr_val, buffer_addr, zero_copy_batch_label_addrs_, batch_label) != SUCCESS) { | |||||
if (task.UpdateTaskParam(addr_val, buffer_addr) != SUCCESS) { | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
} | } | ||||
@@ -3811,9 +3623,6 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa | |||||
GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | GELOGD("Model Run begin, model id:%u, data index:%u, flag:%d.", model_id_, input_data.index, is_async_mode_); | ||||
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed."); | ||||
is_dynamic_ = input_data.is_dynamic_batch; | is_dynamic_ = input_data.is_dynamic_batch; | ||||
if (!is_dynamic_) { | |||||
zero_copy_batch_label_addrs_.clear(); | |||||
} | |||||
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START)); | ||||
Status ret = CopyModelData(input_data, output_data, is_dynamic_); | Status ret = CopyModelData(input_data, output_data, is_dynamic_); | ||||
@@ -76,6 +76,20 @@ struct timeInfo { | |||||
int64_t dumpEndTime; | int64_t dumpEndTime; | ||||
}; | }; | ||||
struct TaskMemInfo { | |||||
int64_t input_size{0}; | |||||
int64_t output_size{0}; | |||||
int64_t weight_size{0}; | |||||
int64_t workspace_size{0}; | |||||
int64_t total_size{0}; | |||||
}; | |||||
struct ProfileInfo { | |||||
FusionOpInfo fusion_info; | |||||
TaskMemInfo memory_info; | |||||
uint32_t task_count{0}; | |||||
}; | |||||
enum ExecuteMode { | enum ExecuteMode { | ||||
INITIALIZATION, | INITIALIZATION, | ||||
SYNCHRONIZATION, | SYNCHRONIZATION, | ||||
@@ -226,8 +240,6 @@ class DavinciModel { | |||||
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; } | const vector<OpDescPtr> &GetDataList() const { return data_op_list_; } | ||||
// get Op | // get Op | ||||
const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; } | |||||
OpDescPtr GetOpByIndex(uint32_t index) const { | OpDescPtr GetOpByIndex(uint32_t index) const { | ||||
if (op_list_.find(index) == op_list_.end()) { | if (op_list_.find(index) == op_list_.end()) { | ||||
return nullptr; | return nullptr; | ||||
@@ -436,10 +448,6 @@ class DavinciModel { | |||||
int64_t GetLoadEndTime() { return load_end_time_; } | int64_t GetLoadEndTime() { return load_end_time_; } | ||||
Status SinkModelProfile(); | |||||
Status SinkTimeProfile(const InputData ¤t_data); | |||||
Status ReportProfilingData(); | Status ReportProfilingData(); | ||||
void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { | ||||
@@ -476,6 +484,14 @@ class DavinciModel { | |||||
void SetTotalIOAddrs(vector<void *> &io_addrs) { | void SetTotalIOAddrs(vector<void *> &io_addrs) { | ||||
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); | ||||
} | } | ||||
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } | |||||
uint32_t GetHybridArgsSize() { | |||||
return total_hybrid_args_size_; | |||||
} | |||||
void *GetCurrentHybridArgsAddr(uint32_t offset) { | |||||
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset; | |||||
return cur_args; | |||||
} | |||||
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); | ||||
int64_t GetFixedAddrsSize(string tensor_name); | int64_t GetFixedAddrsSize(string tensor_name); | ||||
void *GetCurrentFixedAddr(int64_t offset) const { | void *GetCurrentFixedAddr(int64_t offset) const { | ||||
@@ -494,7 +510,7 @@ class DavinciModel { | |||||
Status MallocKnownArgs(); | Status MallocKnownArgs(); | ||||
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs); | ||||
Status UpdateKnownZeroCopyAddr(); | |||||
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs); | |||||
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } | ||||
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); | ||||
@@ -531,15 +547,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Save Batch label Info. | |||||
/// @param [in] const OpDescPtr &op_desc | |||||
/// @param [in] uintptr_t addr: address value in args block. | |||||
/// @return None. | |||||
/// | |||||
void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Copy Check input size and model op size. | /// @brief Copy Check input size and model op size. | ||||
/// @param [in] const int64_t &input_size: input size. | /// @param [in] const int64_t &input_size: input size. | ||||
/// @param [in] const int64_t &op_size: model op size. | /// @param [in] const int64_t &op_size: model op size. | ||||
@@ -651,14 +658,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief input zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitInputZeroCopyNodes(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief NetOutput Op Initialize. | /// @brief NetOutput Op Initialize. | ||||
/// @param [in] NodePtr: NetOutput Op. | /// @param [in] NodePtr: NetOutput Op. | ||||
/// @return Status | /// @return Status | ||||
@@ -667,30 +666,6 @@ class DavinciModel { | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief output zero copy node Initialize. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitOutputZeroCopyNodes(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief input zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: Data Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitInputBatchLabel(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief output zero copy node Initialize for Case. | |||||
/// @param [in] NodePtr: netoutput Op. | |||||
/// @return Status | |||||
/// | |||||
Status InitOutputBatchLabel(const NodePtr &node); | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Constant Op Init. | /// @brief Constant Op Init. | ||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
@@ -837,6 +812,11 @@ class DavinciModel { | |||||
void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); | ||||
Status InitModelProfile(); | |||||
Status SinkModelProfile(); | |||||
Status SinkTimeProfile(const InputData ¤t_data); | |||||
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, | ||||
std::vector<ge::OutputTensorInfo> &outputs); | std::vector<ge::OutputTensorInfo> &outputs); | ||||
@@ -914,11 +894,6 @@ class DavinciModel { | |||||
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr. | ||||
std::set<const void *> copy_only_addrs_; // Address need copy to original place. | std::set<const void *> copy_only_addrs_; // Address need copy to original place. | ||||
// {op_id, batch_label} | |||||
std::map<int64_t, std::string> zero_copy_op_id_batch_label_; | |||||
// {batch_label, addrs} | |||||
std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_; | |||||
std::vector<TaskInfoPtr> task_list_; | std::vector<TaskInfoPtr> task_list_; | ||||
// rt_moodel_handle | // rt_moodel_handle | ||||
rtModel_t rt_model_handle_; | rtModel_t rt_model_handle_; | ||||
@@ -977,6 +952,8 @@ class DavinciModel { | |||||
void *args_ = nullptr; | void *args_ = nullptr; | ||||
void *args_host_ = nullptr; | void *args_host_ = nullptr; | ||||
void *fixed_addrs_ = nullptr; | void *fixed_addrs_ = nullptr; | ||||
void *hybrid_addrs_ = nullptr; | |||||
uint32_t total_hybrid_args_size_ = 0; | |||||
int64_t total_fixed_addr_size_ = 0; | int64_t total_fixed_addr_size_ = 0; | ||||
std::map<const void *, void *> knonw_input_data_info_; | std::map<const void *, void *> knonw_input_data_info_; | ||||
std::map<const void *, void *> knonw_output_data_info_; | std::map<const void *, void *> knonw_output_data_info_; | ||||
@@ -1016,6 +993,9 @@ class DavinciModel { | |||||
// key: input_index: input is merge node; value: each gear info and each output shape | // key: input_index: input is merge node; value: each gear info and each output shape | ||||
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_; | ||||
std::vector<std::vector<int64_t>> all_gears_info_; | std::vector<std::vector<int64_t>> all_gears_info_; | ||||
std::multimap<uint32_t, uint32_t> op_id_map_; | |||||
std::vector<ProfileInfo> profile_list_; | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ | #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ |
@@ -89,6 +89,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||||
if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { | if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { | ||||
std::vector<uint64_t> v_aicpu_kernel; | std::vector<uint64_t> v_aicpu_kernel; | ||||
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | ||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto iter = model_aicpu_kernel_.find(model_key); | auto iter = model_aicpu_kernel_.find(model_key); | ||||
if (iter != model_aicpu_kernel_.end()) { | if (iter != model_aicpu_kernel_.end()) { | ||||
GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); | GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); | ||||
@@ -176,7 +177,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u | |||||
} | } | ||||
void ModelManager::DestroyAicpuSession(uint64_t session_id) { | void ModelManager::DestroyAicpuSession(uint64_t session_id) { | ||||
std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto it = sess_ids_.find(session_id); | auto it = sess_ids_.find(session_id); | ||||
if (it == sess_ids_.end()) { | if (it == sess_ids_.end()) { | ||||
GELOGI("The session: %lu not created.", session_id); | GELOGI("The session: %lu not created.", session_id); | ||||
@@ -205,7 +206,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) { | |||||
} | } | ||||
ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto hybrid_davinci_model = hybrid_model_map_.find(model_id); | auto hybrid_davinci_model = hybrid_model_map_.find(model_id); | ||||
if (hybrid_davinci_model != hybrid_model_map_.end()) { | if (hybrid_davinci_model != hybrid_model_map_.end()) { | ||||
uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); | uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); | ||||
@@ -215,8 +216,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
auto it = model_map_.find(model_id); | auto it = model_map_.find(model_id); | ||||
if (it == model_map_.end()) { | if (it == model_map_.end()) { | ||||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||||
return GE_EXEC_MODEL_ID_INVALID; | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); | |||||
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | |||||
} | } | ||||
uint64_t session_id = it->second->GetSessionId(); | uint64_t session_id = it->second->GetSessionId(); | ||||
DestroyAicpuSession(session_id); | DestroyAicpuSession(session_id); | ||||
@@ -225,7 +226,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { | |||||
ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { | ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { | ||||
GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); | GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | ||||
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | ||||
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); | Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); | ||||
@@ -238,7 +239,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_ | |||||
} | } | ||||
ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { | ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
std::vector<uint64_t> v_aicpu_kernel; | std::vector<uint64_t> v_aicpu_kernel; | ||||
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); | ||||
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { | ||||
@@ -250,7 +251,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i | |||||
} | } | ||||
ModelManager::~ModelManager() { | ModelManager::~ModelManager() { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
model_map_.clear(); | model_map_.clear(); | ||||
model_aicpu_kernel_.clear(); | model_aicpu_kernel_.clear(); | ||||
cust_aicpu_so_.clear(); | cust_aicpu_so_.clear(); | ||||
@@ -358,18 +359,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge | |||||
void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) { | void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) { | ||||
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); | GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
model_map_[id] = davinci_model; | model_map_[id] = davinci_model; | ||||
} | } | ||||
void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) { | ||||
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); | GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
hybrid_model_map_[id] = hybrid_model; | hybrid_model_map_[id] = hybrid_model; | ||||
} | } | ||||
Status ModelManager::DeleteModel(uint32_t id) { | Status ModelManager::DeleteModel(uint32_t id) { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto it = model_map_.find(id); | auto it = model_map_.find(id); | ||||
auto hybrid_model_it = hybrid_model_map_.find(id); | auto hybrid_model_it = hybrid_model_map_.find(id); | ||||
@@ -384,22 +385,22 @@ Status ModelManager::DeleteModel(uint32_t id) { | |||||
} else if (hybrid_model_it != hybrid_model_map_.end()) { | } else if (hybrid_model_it != hybrid_model_map_.end()) { | ||||
(void)hybrid_model_map_.erase(hybrid_model_it); | (void)hybrid_model_map_.erase(hybrid_model_it); | ||||
} else { | } else { | ||||
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||||
return GE_EXEC_MODEL_ID_INVALID; | |||||
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); | |||||
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID; | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) { | std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto it = model_map_.find(id); | auto it = model_map_.find(id); | ||||
return (it == model_map_.end()) ? nullptr : it->second; | return (it == model_map_.end()) ? nullptr : it->second; | ||||
} | } | ||||
std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) { | std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) { | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto it = hybrid_model_map_.find(id); | auto it = hybrid_model_map_.find(id); | ||||
return (it == hybrid_model_map_.end()) ? nullptr : it->second; | return (it == hybrid_model_map_.end()) ? nullptr : it->second; | ||||
@@ -902,7 +903,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu | |||||
} | } | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | ||||
davinci_model->SetModelDescVersion(new_model_desc); | davinci_model->SetModelDescVersion(new_model_desc); | ||||
@@ -970,8 +971,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, | |||||
} | } | ||||
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | |||||
GE_CHECK_NOTNULL(davinci_model); | |||||
auto davinci_model = GetModel(model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
"GetCurShape Failed, Invalid Model ID %u!", model_id); | |||||
davinci_model->GetCurShape(batch_info, dynamic_type); | davinci_model->GetCurShape(batch_info, dynamic_type); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -984,7 +986,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami | |||||
} | } | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHECK_NOTNULL(davinci_model); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
"GetModelAttr Failed, Invalid Model ID %u!", model_id); | |||||
davinci_model->GetModelAttr(dynamic_output_shape_info); | davinci_model->GetModelAttr(dynamic_output_shape_info); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -994,9 +997,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
std::vector<uint32_t> &inputFormats, | std::vector<uint32_t> &inputFormats, | ||||
std::vector<uint32_t> &outputFormats) { | std::vector<uint32_t> &outputFormats) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", | |||||
model_id); | |||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | |||||
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); | |||||
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); | ||||
} | } | ||||
@@ -1011,18 +1013,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, | |||||
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
"GetAIPPInfo failed, invalid model_id is %u.", | |||||
model_id); | |||||
"GetAIPPInfo failed, invalid model_id is %u.", model_id); | |||||
return davinci_model->GetAIPPInfo(index, aipp_info); | return davinci_model->GetAIPPInfo(index, aipp_info); | ||||
} | } | ||||
Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { | ||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
"GetAIPPInfo failed, invalid model_id is %u.", | |||||
model_id); | |||||
"GetAIPPInfo failed, invalid model_id is %u.", model_id); | |||||
return davinci_model->GetAippType(index, type, aipp_index); | return davinci_model->GetAippType(index, type, aipp_index); | ||||
} | } | ||||
@@ -1055,7 +1053,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
mmTimespec timespec = mmGetTickCount(); | mmTimespec timespec = mmGetTickCount(); | ||||
ModelHelper model_helper; | ModelHelper model_helper; | ||||
Status ret = model_helper.LoadModel(model); | |||||
Status ret = model_helper.LoadRootModel(model); | |||||
if (model_helper.GetModelType()) { | |||||
bool is_shape_unknown = false; | |||||
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown), | |||||
"CheckIsUnknownShape failed, model id:%u", model_id); | |||||
if (is_shape_unknown || GetContext().GetHostExecFlag()) { | |||||
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener); | |||||
} | |||||
} | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "load model failed."); | GELOGE(ret, "load model failed."); | ||||
return ret; | return ret; | ||||
@@ -1069,8 +1075,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); | GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); | ||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | return ACL_ERROR_GE_MEMORY_ALLOCATION; | ||||
} catch (...) { | } catch (...) { | ||||
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); | |||||
return INTERNAL_ERROR; | |||||
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise"); | |||||
return ACL_ERROR_GE_MEMORY_ALLOCATION; | |||||
} | } | ||||
ret = davinci_model->Assign(ge_model); | ret = davinci_model->Assign(ge_model); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -1082,7 +1088,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model | |||||
int32_t device_id = 0; | int32_t device_id = 0; | ||||
rtError_t rt_ret = rtGetDevice(&device_id); | rtError_t rt_ret = rtGetDevice(&device_id); | ||||
if (rt_ret != RT_ERROR_NONE || device_id < 0) { | if (rt_ret != RT_ERROR_NONE || device_id < 0) { | ||||
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||||
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | return RT_ERROR_TO_GE_STATUS(rt_ret); | ||||
} | } | ||||
davinci_model->SetDeviceId(device_id); | davinci_model->SetDeviceId(device_id); | ||||
@@ -1214,7 +1220,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id); | ||||
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, | ||||
"Invalid model id %u, check weather model has been loaded or not.", model_id); | |||||
"Invalid model id %u, check whether model has been loaded or not.", model_id); | |||||
if (davinci_model->NeedDestroyAicpuKernel()) { | if (davinci_model->NeedDestroyAicpuKernel()) { | ||||
GELOGI("Start to destroy specified aicpu kernel."); | GELOGI("Start to destroy specified aicpu kernel."); | ||||
@@ -1237,7 +1243,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy | |||||
} | } | ||||
Status ModelManager::CreateAicpuSession(uint64_t session_id) { | Status ModelManager::CreateAicpuSession(uint64_t session_id) { | ||||
std::lock_guard<std::mutex> lock(sess_ids_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
auto it = sess_ids_.find(session_id); | auto it = sess_ids_.find(session_id); | ||||
// never been created by any model | // never been created by any model | ||||
if (it == sess_ids_.end()) { | if (it == sess_ids_.end()) { | ||||
@@ -1456,8 +1462,7 @@ void ModelManager::GenModelId(uint32_t *id) { | |||||
if (id == nullptr) { | if (id == nullptr) { | ||||
return; | return; | ||||
} | } | ||||
std::lock_guard<std::mutex> lock(map_mutex_); | |||||
std::lock_guard<std::recursive_mutex> lock(map_mutex_); | |||||
*id = ++max_model_id_; | *id = ++max_model_id_; | ||||
} | } | ||||
@@ -353,8 +353,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager { | |||||
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_; | ||||
std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_; | std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_; | ||||
uint32_t max_model_id_; | uint32_t max_model_id_; | ||||
std::mutex map_mutex_; | |||||
std::mutex sess_ids_mutex_; | |||||
std::recursive_mutex map_mutex_; | |||||
std::mutex session_id_create_mutex_; | std::mutex session_id_create_mutex_; | ||||
static::std::mutex exeception_infos_mutex_; | static::std::mutex exeception_infos_mutex_; | ||||
uint64_t session_id_bias_; | uint64_t session_id_bias_; | ||||
@@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci | |||||
fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; | fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names; | ||||
fusion_op_info_.op_name = op_desc_->GetName()); | fusion_op_info_.op_name = op_desc_->GetName()); | ||||
string session_graph_model_id; | |||||
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | |||||
// get bin_file_key | |||||
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
// new aicpu kernel(rtCpuKernelLaunch) no need to check function | // new aicpu kernel(rtCpuKernelLaunch) no need to check function | ||||
if (kernel_type_ == ccKernelType::CCE_AI_CORE) { | if (kernel_type_ == ccKernelType::CCE_AI_CORE) { | ||||
rtError_t rt_ret; | |||||
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | |||||
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s", | ||||
kernel_def.stub_func().c_str()); | kernel_def.stub_func().c_str()); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | return RT_ERROR_TO_GE_STATUS(rt_ret);); | ||||
} else if (kernel_type_ == ccKernelType::TE) { | } else if (kernel_type_ == ccKernelType::TE) { | ||||
rtError_t rt_ret; | |||||
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | |||||
// get bin_file_key | |||||
string session_graph_model_id; | |||||
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id); | |||||
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id); | |||||
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_); | |||||
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, | ||||
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key); | ||||
return RT_ERROR_TO_GE_STATUS(rt_ret);); | return RT_ERROR_TO_GE_STATUS(rt_ret);); | ||||
@@ -372,7 +370,11 @@ Status KernelTaskInfo::SuperKernelDistribute() { | |||||
Status KernelTaskInfo::Distribute() { | Status KernelTaskInfo::Distribute() { | ||||
GELOGD("KernelTaskInfo Distribute Start."); | GELOGD("KernelTaskInfo Distribute Start."); | ||||
if (davinci_model_->IsKnownNode()) { | if (davinci_model_->IsKnownNode()) { | ||||
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
if (kernel_type_ == ccKernelType::TE) { | |||||
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_); | |||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_); | |||||
} | |||||
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_); | ||||
} | } | ||||
rtError_t rt_ret = RT_ERROR_NONE; | rtError_t rt_ret = RT_ERROR_NONE; | ||||
@@ -428,36 +430,31 @@ Status KernelTaskInfo::UpdateArgs() { | |||||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | ||||
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_); | ||||
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_); | ||||
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { | |||||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); | |||||
if (kernel_type_ == ccKernelType::TE) { | |||||
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_); | |||||
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | ||||
} else { | |||||
string peer_input_name; | |||||
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) { | |||||
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name); | |||||
if (output_index > output_data_addrs.size()) { | |||||
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.", | |||||
output_data_addrs.size(), output_index); | |||||
return FAILED; | |||||
} | |||||
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); | |||||
for (size_t i = 0; i < output_data_addrs.size(); ++i) { | |||||
if (i == output_index) { | |||||
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); | |||||
io_addrs.emplace_back(fixed_addr); | |||||
continue; | |||||
} | |||||
io_addrs.emplace_back(output_data_addrs[i]); | |||||
} | |||||
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); | |||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs); | |||||
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead); | |||||
auto addrs_size = sizeof(uint64_t) * io_addrs.size(); | |||||
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size); | |||||
if (sec_ret != EOK) { | |||||
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret); | |||||
return FAILED; | |||||
} | |||||
// copy args to device | |||||
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE); | |||||
if (rt_ret != RT_ERROR_NONE) { | |||||
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret); | |||||
return RT_ERROR_TO_GE_STATUS(rt_ret); | |||||
} | } | ||||
} | } | ||||
davinci_model_->SetTotalIOAddrs(io_addrs); | |||||
GELOGI("KernelTaskInfo::UpdateArgs success."); | GELOGI("KernelTaskInfo::UpdateArgs success."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -533,33 +530,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) { | |||||
} | } | ||||
Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | ||||
domi::KernelDef kernel_def = task_def.kernel(); | |||||
uint32_t args_size = kernel_def.args_size(); | |||||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
davinci_model->SetTotalArgsSize(args_size); | |||||
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
// get opcontext stored in model | |||||
const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
const domi::KernelContext &context = kernel_def.context(); | const domi::KernelContext &context = kernel_def.context(); | ||||
// get opdesc | |||||
op_desc_ = davinci_model->GetOpByIndex(context.op_index()); | |||||
GE_CHECK_NOTNULL(op_desc_); | |||||
// alloc fixed addr | |||||
string peer_input_name; | |||||
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { | |||||
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); | |||||
if (output_index > op_desc_->GetOutputsSize()) { | |||||
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(), | |||||
output_index); | |||||
return FAILED; | |||||
} | |||||
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); | |||||
auto tensor_desc = op_desc_->GetOutputDesc(output_index); | |||||
int64_t tensor_size = 0; | |||||
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); | |||||
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); | |||||
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size, | |||||
fixed_addr_offset_); | |||||
kernel_type_ = static_cast<ccKernelType>(context.kernel_type()); | |||||
if (kernel_type_ == ccKernelType::TE) { | |||||
uint32_t args_size = kernel_def.args_size(); | |||||
args_offset_ = davinci_model->GetTotalArgsSize(); | |||||
davinci_model->SetTotalArgsSize(args_size); | |||||
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_); | |||||
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) { | |||||
hybrid_args_offset_ = davinci_model->GetHybridArgsSize(); | |||||
davinci_model->SetHybridArgsSize(kernel_def.args_size()); | |||||
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -888,7 +870,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
} | } | ||||
// copy args to new host memory | // copy args to new host memory | ||||
std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]); | |||||
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]); | |||||
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_) | ||||
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_); | ||||
if (sec_ret != EOK) { | if (sec_ret != EOK) { | ||||
@@ -896,8 +878,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
if (init_ret != SUCCESS) { | |||||
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
return init_ret; | |||||
} | |||||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
if (davinci_model_->IsKnownNode()) { | |||||
return SUCCESS; | |||||
} | |||||
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam(); | |||||
vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc); | ||||
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc); | ||||
vector<void *> io_addrs; | vector<void *> io_addrs; | ||||
@@ -914,19 +911,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k | |||||
} | } | ||||
} | } | ||||
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get()); | |||||
const auto &ext_info = kernel_def.kernel_ext_info(); | |||||
auto init_ret = InitAicpuTaskExtInfo(ext_info); | |||||
if (init_ret != SUCCESS) { | |||||
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size()); | |||||
return init_ret; | |||||
} | |||||
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(), | |||||
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_); | |||||
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_); | |||||
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size()); | |||||
// malloc device memory for args | // malloc device memory for args | ||||
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM); | ||||
if (rt_ret != RT_ERROR_NONE) { | if (rt_ret != RT_ERROR_NONE) { | ||||
@@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo { | |||||
OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
DavinciModel *davinci_model_; | DavinciModel *davinci_model_; | ||||
uint32_t args_offset_ = 0; | uint32_t args_offset_ = 0; | ||||
uint32_t hybrid_args_offset_ = 0; | |||||
int64_t fixed_addr_offset_ = 0; | int64_t fixed_addr_offset_ = 0; | ||||
std::unique_ptr<uint8_t[]> args_addr = nullptr; | |||||
bool call_save_dump_ = false; | bool call_save_dump_ = false; | ||||
// aicpu ext_info device mem | // aicpu ext_info device mem | ||||
@@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo | |||||
addr_count_ = out_count; | addr_count_ = out_count; | ||||
} | } | ||||
bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||||
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { | |||||
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); | ||||
bool set_batch_label_flag = false; | |||||
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { | ||||
auto &addrs_mapping_list = GetOutsideAddrs(); | |||||
auto args_addrs = addrs_mapping_list[out_count].find(outside_addr); | |||||
if (args_addrs != addrs_mapping_list[out_count].end()) { | |||||
auto args_addrs = outside_addrs_[out_count].find(outside_addr); | |||||
if (args_addrs != outside_addrs_[out_count].end()) { | |||||
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid."); | ||||
void *args_val = static_cast<uint8_t *>(args) + offset; | void *args_val = static_cast<uint8_t *>(args) + offset; | ||||
args_addrs->second.push_back(args_val); | args_addrs->second.push_back(args_val); | ||||
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val, | ||||
args, offset); | args, offset); | ||||
set_batch_label_flag = true; | |||||
} | } | ||||
} | } | ||||
return set_batch_label_flag; | |||||
} | } | ||||
} // namespace ge | } // namespace ge |
@@ -51,7 +51,7 @@ class ZeroCopyOffset { | |||||
const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); | const OpDescPtr &op_desc, const size_t &idx, bool &fusion_flag); | ||||
void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | void SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, | ||||
std::vector<void *> &tensor_addrs); | std::vector<void *> &tensor_addrs); | ||||
bool SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||||
void SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset); | |||||
// basic_addr of l2-fusion | // basic_addr of l2-fusion | ||||
void *GetBasicAddr() const { return basic_addr_; } | void *GetBasicAddr() const { return basic_addr_; } | ||||
@@ -22,8 +22,6 @@ | |||||
#include "common/ge_compiler_options.h" | #include "common/ge_compiler_options.h" | ||||
namespace ge { | namespace ge { | ||||
const char *const kDefaultBatchLable = "Batch_default"; | |||||
ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) | ZeroCopyTask::ZeroCopyTask(const string &name, uint8_t *args, size_t size) | ||||
: name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} | : name_(name), args_addr_(args), args_size_(size), is_updated_(false) {} | ||||
@@ -66,68 +64,23 @@ void ZeroCopyTask::SetOriginalArgs(const void *info, size_t size) { | |||||
const uint8_t *data = static_cast<const uint8_t *>(info); | const uint8_t *data = static_cast<const uint8_t *>(info); | ||||
args_info_.assign(data, data + size); | args_info_.assign(data, data + size); | ||||
GELOGI("[ZCPY] %s set info from virtual_addr: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||||
GELOGI("[ZCPY] %s set original args info: %p, args_addr: %p, args size: %zu, info size: %zu", name_.c_str(), info, | |||||
args_addr_, args_size_, size); | args_addr_, args_size_, size); | ||||
} | } | ||||
/** | /** | ||||
* @ingroup ge | * @ingroup ge | ||||
* @brief Check is dynamic batch node. | |||||
* @param [in] addr: virtual address value from Op. | |||||
* @param [in] data: data buffer from user. | |||||
* @param [in] batch_addrs: dynamic batch addr info. | |||||
* @param [in] batch_label: batch label. | |||||
* @return: true / false | |||||
*/ | |||||
bool ZeroCopyTask::CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, | |||||
uintptr_t addr) { | |||||
// Used for dynamic batch / resolution scene | |||||
set<uintptr_t> dynamic_input_addrs; | |||||
auto dynamic_input_iter = batch_addrs.find(batch_label); | |||||
if (dynamic_input_iter != batch_addrs.end()) { | |||||
dynamic_input_addrs = dynamic_input_iter->second; | |||||
} | |||||
set<uintptr_t> fix_input_addrs; | |||||
auto fix_input_iter = batch_addrs.find(kDefaultBatchLable); | |||||
if (fix_input_iter != batch_addrs.end()) { | |||||
fix_input_addrs = fix_input_iter->second; | |||||
} | |||||
if (fix_input_addrs.empty()) { | |||||
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end()) { | |||||
return false; | |||||
} | |||||
} else { | |||||
if (!dynamic_input_addrs.empty() && dynamic_input_addrs.find(addr) == dynamic_input_addrs.end() && | |||||
fix_input_addrs.find(addr) == fix_input_addrs.end()) { | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
/** | |||||
* @ingroup ge | |||||
* @brief Set user data addr to Task param. | * @brief Set user data addr to Task param. | ||||
* @param [in] addr: virtual address value from Op. | * @param [in] addr: virtual address value from Op. | ||||
* @param [in] buffer_addr: real_data_buffer_addr from user. | * @param [in] buffer_addr: real_data_buffer_addr from user. | ||||
* @param [in] batch_addrs: dynamic batch addr info. | |||||
* @param [in] batch_label: batch label. | |||||
* @return: void | * @return: void | ||||
*/ | */ | ||||
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | |||||
const string &batch_label) { | |||||
Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr) { | |||||
auto iter = task_addr_offset_.find(addr); | auto iter = task_addr_offset_.find(addr); | ||||
if (iter != task_addr_offset_.end()) { | if (iter != task_addr_offset_.end()) { | ||||
auto &cur_pair = *iter; | auto &cur_pair = *iter; | ||||
uint8_t *args_info = args_info_.data(); | uint8_t *args_info = args_info_.data(); | ||||
for (auto offset : cur_pair.second) { | for (auto offset : cur_pair.second) { | ||||
if (!CheckDynamicBatch(batch_addrs, batch_label, reinterpret_cast<uintptr_t>(args_addr_ + offset))) { | |||||
continue; | |||||
} | |||||
auto dst_addr = static_cast<uint8_t *>(buffer_addr); | auto dst_addr = static_cast<uint8_t *>(buffer_addr); | ||||
GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", | GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p", | ||||
name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); | name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr); | ||||
@@ -67,12 +67,9 @@ class ZeroCopyTask { | |||||
* @brief Set user data addr to Task param. | * @brief Set user data addr to Task param. | ||||
* @param [in] addr: virtual address value from Op. | * @param [in] addr: virtual address value from Op. | ||||
* @param [in] buffer_addr: data buffer_addr from user. | * @param [in] buffer_addr: data buffer_addr from user. | ||||
* @param [in] batch_addrs: dynamic batch addr info. | |||||
* @param [in] batch_label: batch label. | |||||
* @return: 0 SUCCESS / others FAILED | * @return: 0 SUCCESS / others FAILED | ||||
*/ | */ | ||||
ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr, const map<string, set<uintptr_t>> &batch_addrs, | |||||
const string &batch_label); | |||||
ge::Status UpdateTaskParam(uintptr_t addr, void *buffer_addr); | |||||
/** | /** | ||||
* @ingroup ge | * @ingroup ge | ||||
@@ -91,9 +88,6 @@ class ZeroCopyTask { | |||||
return batch_label_; | return batch_label_; | ||||
} | } | ||||
protected: | |||||
bool CheckDynamicBatch(const map<string, set<uintptr_t>> &batch_addrs, const string &batch_label, uintptr_t addr); | |||||
private: | private: | ||||
const string name_; | const string name_; | ||||
@@ -23,25 +23,15 @@ | |||||
#include <sstream> | #include <sstream> | ||||
#include <string> | #include <string> | ||||
#include <thread> | #include <thread> | ||||
#include <utility> | |||||
#include "common/ge/ge_util.h" | |||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/thread_pool.h" | #include "common/thread_pool.h" | ||||
#include "common/util.h" | |||||
#include "external/graph/types.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "framework/common/ge_inner_error_codes.h" | |||||
#include "framework/common/ge_types.h" | |||||
#include "analyzer/analyzer.h" | #include "analyzer/analyzer.h" | ||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/common/transop_util.h" | #include "graph/common/transop_util.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/ge_global_options.h" | #include "graph/ge_global_options.h" | ||||
#include "graph/ge_local_context.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | |||||
#include "graph/manager/util/rt_context_util.h" | #include "graph/manager/util/rt_context_util.h" | ||||
#include "graph/partition/dynamic_shape_partition.h" | #include "graph/partition/dynamic_shape_partition.h" | ||||
#include "graph/passes/enter_pass.h" | #include "graph/passes/enter_pass.h" | ||||
@@ -61,8 +51,6 @@ | |||||
#include "graph/passes/dimension_adjust_pass.h" | #include "graph/passes/dimension_adjust_pass.h" | ||||
#include "graph/passes/dimension_compute_pass.h" | #include "graph/passes/dimension_compute_pass.h" | ||||
#include "graph/passes/flow_ctrl_pass.h" | #include "graph/passes/flow_ctrl_pass.h" | ||||
#include "graph/passes/hccl_group_pass.h" | |||||
#include "graph/passes/hccl_memcpy_pass.h" | |||||
#include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
#include "graph/passes/input_output_connection_identify_pass.h" | #include "graph/passes/input_output_connection_identify_pass.h" | ||||
#include "graph/passes/iterator_op_pass.h" | #include "graph/passes/iterator_op_pass.h" | ||||
@@ -77,7 +65,6 @@ | |||||
#include "graph/passes/permute_pass.h" | #include "graph/passes/permute_pass.h" | ||||
#include "graph/passes/prune_pass.h" | #include "graph/passes/prune_pass.h" | ||||
#include "graph/passes/ref_identity_delete_op_pass.h" | #include "graph/passes/ref_identity_delete_op_pass.h" | ||||
#include "graph/passes/replace_with_empty_const_pass.h" | |||||
#include "graph/passes/reshape_recovery_pass.h" | #include "graph/passes/reshape_recovery_pass.h" | ||||
#include "graph/passes/reshape_remove_pass.h" | #include "graph/passes/reshape_remove_pass.h" | ||||
#include "graph/passes/same_transdata_breadth_fusion_pass.h" | #include "graph/passes/same_transdata_breadth_fusion_pass.h" | ||||
@@ -87,13 +74,11 @@ | |||||
#include "graph/passes/switch_logic_remove_pass.h" | #include "graph/passes/switch_logic_remove_pass.h" | ||||
#include "graph/passes/switch_to_stream_switch_pass.h" | #include "graph/passes/switch_to_stream_switch_pass.h" | ||||
#include "graph/passes/transop_breadth_fusion_pass.h" | #include "graph/passes/transop_breadth_fusion_pass.h" | ||||
#include "graph/passes/transop_depth_fusion_pass.h" | |||||
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" | #include "graph/passes/transop_nearby_allreduce_fusion_pass.h" | ||||
#include "graph/passes/transop_symmetry_elimination_pass.h" | #include "graph/passes/transop_symmetry_elimination_pass.h" | ||||
#include "graph/passes/transop_without_reshape_fusion_pass.h" | #include "graph/passes/transop_without_reshape_fusion_pass.h" | ||||
#include "graph/passes/transpose_transdata_pass.h" | #include "graph/passes/transpose_transdata_pass.h" | ||||
#include "graph/passes/variable_op_pass.h" | #include "graph/passes/variable_op_pass.h" | ||||
#include "graph/passes/variable_prepare_op_pass.h" | |||||
#include "graph/passes/variable_ref_delete_op_pass.h" | #include "graph/passes/variable_ref_delete_op_pass.h" | ||||
#include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | #include "graph/passes/variable_ref_useless_control_out_delete_pass.h" | ||||
#include "graph/passes/end_of_sequence_add_control_pass.h" | #include "graph/passes/end_of_sequence_add_control_pass.h" | ||||
@@ -104,9 +89,6 @@ | |||||
#include "graph/passes/memcpy_addr_async_pass.h" | #include "graph/passes/memcpy_addr_async_pass.h" | ||||
#include "graph/build/label_allocator.h" | #include "graph/build/label_allocator.h" | ||||
#include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
#include "graph/utils/type_utils.h" | |||||
#include "graph/graph_util.h" | |||||
#include "graph/types.h" | |||||
#include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "ir_build/atc_ir_common.h" | #include "ir_build/atc_ir_common.h" | ||||
@@ -550,7 +532,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
} | } | ||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id, | |||||
GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
if (!f.valid()) { | if (!f.valid()) { | ||||
GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
@@ -565,7 +547,7 @@ Status GraphManager::OptimizeSubGraphWithMultiThreads(ComputeGraphPtr compute_gr | |||||
(void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | (void) AttrUtils::SetStr(subgraph->GetSubGraph(), ATTR_NAME_OP_COMPILE_STRATEGY, op_compile_strategy); | ||||
} | } | ||||
std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | std::future<Status> f = executor.commit(GraphManager::ProcessSubGraphWithMultiThreads, this, | ||||
compute_graph->GetGraphID(), subgraph, compute_graph, session_id, | |||||
compute_graph->GetGraphID(), subgraph, compute_graph->GetName(), session_id, | |||||
GetThreadLocalContext()); | GetThreadLocalContext()); | ||||
if (!f.valid()) { | if (!f.valid()) { | ||||
GELOGE(FAILED, "Future is invalid"); | GELOGE(FAILED, "Future is invalid"); | ||||
@@ -2471,7 +2453,8 @@ Status GraphManager::CheckAndReleaseMemory(const GeModelPtr &ge_model, const Gra | |||||
Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | ||||
const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
const ComputeGraphPtr &compute_graph, uint64_t session_id, | |||||
const std::string &root_graph_name, | |||||
uint64_t session_id, | |||||
const GEThreadLocalContext &ge_context) { | const GEThreadLocalContext &ge_context) { | ||||
if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | if (sub_graph_info_ptr != nullptr && graph_manager != nullptr) { | ||||
GetContext().SetSessionId(session_id); | GetContext().SetSessionId(session_id); | ||||
@@ -2488,9 +2471,13 @@ Status GraphManager::ProcessSubGraphWithMultiThreads(GraphManager *graph_manager | |||||
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); | GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_ID for subgraph, graph_id: %u.", root_graph_id); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
if (!AttrUtils::SetStr(*compute_graph_tmp, ATTR_NAME_ROOT_GRAPH_NAME, root_graph_name)) { | |||||
GELOGE(FAILED, "Failed to set attr ATTR_NAME_ROOT_GRAPH_NAME for subgraph, \ | |||||
root_graph_name: %s.", root_graph_name.c_str()); | |||||
return FAILED; | |||||
} | |||||
compute_graph_tmp->SetSessionID(session_id); | compute_graph_tmp->SetSessionID(session_id); | ||||
Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, | Status ret = graph_manager->GetCompilerStages(root_graph_id).optimizer.OptimizeSubGraph(compute_graph_tmp, | ||||
compute_graph, | |||||
engine_name); | engine_name); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | GELOGE(ret, "SubGraph optimize Failed %s", engine_name.c_str()); | ||||
@@ -219,7 +219,8 @@ class GraphManager { | |||||
static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | static Status ProcessSubGraphWithMultiThreads(GraphManager *graph_manager, GraphId root_graph_id, | ||||
const SubGraphInfoPtr &sub_graph_info_ptr, | const SubGraphInfoPtr &sub_graph_info_ptr, | ||||
const ComputeGraphPtr &compute_graph, uint64_t session_id, | |||||
const std::string &root_graph_name, | |||||
uint64_t session_id, | |||||
const GEThreadLocalContext &ge_context); | const GEThreadLocalContext &ge_context); | ||||
Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | Status ParseInputsDims(const std::vector<InputTensorInfo> &input_tensor); | ||||
void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | void ParseInputsDimsForData(const std::vector<InputTensorInfo> &input_tensor); | ||||
@@ -16,10 +16,7 @@ | |||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include <set> | |||||
#include <string> | #include <string> | ||||
#include "framework/common/debug/ge_log.h" | |||||
#include "graph/manager/graph_caching_allocator.h" | #include "graph/manager/graph_caching_allocator.h" | ||||
#include "graph/manager/rdma_pool_allocator.h" | #include "graph/manager/rdma_pool_allocator.h" | ||||
@@ -63,7 +63,7 @@ Status RdmaRemoteRegister(const std::vector<HostVarInfo> &var_info, rtMemType_t | |||||
}); | }); | ||||
auto hcom_remote_mem_register = | auto hcom_remote_mem_register = | ||||
(HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "hcom_remote_access_mem_register"); | |||||
(HcclResult(*)(const MemRegisterAddr *, uint32_t))dlsym(handle, "HcomRegRemoteAccessMem"); | |||||
if (hcom_remote_mem_register == nullptr) { | if (hcom_remote_mem_register == nullptr) { | ||||
GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); | GELOGE(FAILED, "Failed to invoke hcom_remote_mem_register function."); | ||||
return FAILED; | return FAILED; | ||||
@@ -76,7 +76,7 @@ void AddNodeInputProperty(ComputeGraphPtr &compute_graph) { | |||||
} | } | ||||
} | } | ||||
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, | |||||
Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, | |||||
const std::string &engine_name) { | const std::string &engine_name) { | ||||
if (compute_graph == nullptr) { | if (compute_graph == nullptr) { | ||||
GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); | GELOGE(GE_GRAPH_OPTIMIZE_COMPUTE_GRAPH_NULL, "[OptimizeSubGraph]: compute_graph is nullptr."); | ||||
@@ -106,10 +106,6 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const Com | |||||
for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { | ||||
Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); | Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
auto root_graph = ge::GraphUtils::FindRootGraph(parent_graph); | |||||
if (root_graph != nullptr) { | |||||
ErrorManager::GetInstance().SaveMstuneCompileFailedMsg(root_graph->GetName()); | |||||
} | |||||
GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); | GELOGE(ret, "[OptimizeSubGraph][OptimizeFusedGraphAfterGraphSlice]: graph optimize failed, ret:%d", ret); | ||||
return ret; | return ret; | ||||
} | } | ||||
@@ -42,8 +42,7 @@ class GraphOptimize { | |||||
~GraphOptimize() = default; | ~GraphOptimize() = default; | ||||
// subgraph optimize | // subgraph optimize | ||||
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const ComputeGraphPtr &parent_graph, | |||||
const std::string &engine_name); | |||||
Status OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std::string &engine_name); | |||||
// original graph optimize | // original graph optimize | ||||
Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph); | Status OptimizeOriginalGraph(ComputeGraphPtr &compute_graph); | ||||
@@ -113,6 +113,17 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) { | |||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim}; | std::vector<int64_t> dynamic_shape_dims = {kDynamicShapeDim}; | ||||
GeShape dynamic_shape(dynamic_shape_dims); | GeShape dynamic_shape(dynamic_shape_dims); | ||||
bool reset_shape_flag = false; | |||||
if (ResetInputTensorShape(op_desc, dynamic_shape, reset_shape_flag) == SUCCESS && reset_shape_flag) { | |||||
(void)ResetOutputTensorShape(op_desc, dynamic_shape); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status DynamicSingleOpResetShapePass::ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, | |||||
bool &reset_shape_flag) { | |||||
reset_shape_flag = false; | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { | for (size_t i = 0; i < op_desc->GetAllInputsDesc().size(); i++) { | ||||
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i)); | ||||
GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
@@ -125,8 +136,14 @@ Status DynamicSingleOpResetShapePass::ResetOpShape(OpDescPtr &op_desc) { | |||||
if (CheckIfConstInput(input_desc)) { | if (CheckIfConstInput(input_desc)) { | ||||
continue; | continue; | ||||
} | } | ||||
reset_shape_flag = true; | |||||
input_desc->SetShape(dynamic_shape); | input_desc->SetShape(dynamic_shape); | ||||
} | } | ||||
return SUCCESS; | |||||
} | |||||
Status DynamicSingleOpResetShapePass::ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape) { | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) { | for (size_t i = 0; i < op_desc->GetAllOutputsDesc().size(); i++) { | ||||
auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i)); | auto output_desc = op_desc->MutableOutputDesc(static_cast<uint32_t>(i)); | ||||
GE_CHECK_NOTNULL(output_desc); | GE_CHECK_NOTNULL(output_desc); | ||||
@@ -27,6 +27,8 @@ class DynamicSingleOpResetShapePass : public GraphPass { | |||||
private: | private: | ||||
Status ResetOpShape(OpDescPtr &op_desc); | Status ResetOpShape(OpDescPtr &op_desc); | ||||
Status ResetInputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape, bool &reset_shape_flag); | |||||
Status ResetOutputTensorShape(OpDescPtr &op_desc, const GeShape &dynamic_shape); | |||||
Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu); | Status CheckAllAicpuNodes(const ComputeGraphPtr &graph, bool &is_not_aicpu); | ||||
bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc); | bool CheckIfConstInput(const GeTensorDescPtr &input_tensor_desc); | ||||
}; | }; | ||||
@@ -17,13 +17,8 @@ | |||||
#include "graph/passes/switch_to_stream_switch_pass.h" | #include "graph/passes/switch_to_stream_switch_pass.h" | ||||
#include <stack> | #include <stack> | ||||
#include "common/ge/ge_util.h" | #include "common/ge/ge_util.h" | ||||
#include "framework/common/debug/ge_log.h" | |||||
#include "framework/common/debug/log.h" | |||||
#include "framework/common/ge_inner_error_codes.h" | |||||
#include "framework/common/types.h" | |||||
#include "ge/ge_api_types.h" | #include "ge/ge_api_types.h" | ||||
#include "graph/common/omg_util.h" | #include "graph/common/omg_util.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
@@ -125,12 +120,13 @@ void SwitchToStreamSwitchPass::MarkCycleDependence( | |||||
if (visited.count(tmp_node) > 0) { | if (visited.count(tmp_node) > 0) { | ||||
continue; | continue; | ||||
} | } | ||||
GELOGD("MarkCycleDependence: tmp_node=%s.", tmp_node->GetName().c_str()); | |||||
for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) { | for (const NodePtr &out_node : tmp_node->GetOutAllNodes()) { | ||||
if (switch_nodes.find(out_node) == switch_nodes.end()) { | if (switch_nodes.find(out_node) == switch_nodes.end()) { | ||||
out_nodes.push(out_node); | out_nodes.push(out_node); | ||||
continue; | continue; | ||||
} | } | ||||
GELOGD("MarkCycleDependence: tmp_node=%s, switch_node=%s.", | |||||
tmp_node->GetName().c_str(), out_node->GetName().c_str()); | |||||
GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, | GE_IF_BOOL_EXEC(SetCyclicDependenceFlag(out_node) != SUCCESS, | ||||
GELOGW("set cyclic dependence attr failed."); return ); | GELOGW("set cyclic dependence attr failed."); return ); | ||||
auto map_iter = switch_cyclic_map_.find(out_node); | auto map_iter = switch_cyclic_map_.find(out_node); | ||||
@@ -602,7 +598,7 @@ Status SwitchToStreamSwitchPass::AddConstNode(const ComputeGraphPtr &graph, cons | |||||
/// | /// | ||||
Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, | Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_node, const NodePtr &cast_node, | ||||
const std::set<NodePtr> &same_cond_switch) { | const std::set<NodePtr> &same_cond_switch) { | ||||
GELOGI("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), | |||||
GELOGD("ModifySwitchInCtlEdges: switch_node=%s, active_node=%s", switch_node->GetName().c_str(), | |||||
cast_node->GetName().c_str()); | cast_node->GetName().c_str()); | ||||
std::string orig_switch_name = switch_node->GetName(); | std::string orig_switch_name = switch_node->GetName(); | ||||
OpDescPtr switch_desc = switch_node->GetOpDesc(); | OpDescPtr switch_desc = switch_node->GetOpDesc(); | ||||
@@ -653,7 +649,7 @@ Status SwitchToStreamSwitchPass::ModifySwitchInCtlEdges(const NodePtr &switch_no | |||||
/// | /// | ||||
Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch, | Status SwitchToStreamSwitchPass::ModifySwitchOutCtlEdges(const NodePtr &switch_node, const NodePtr &stream_switch, | ||||
const NodePtr &active_node) { | const NodePtr &active_node) { | ||||
GELOGI("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), | |||||
GELOGD("ModifySwitchOutCtlEdges: switch_node=%s, stream_switch=%s, active_node=%s", switch_node->GetName().c_str(), | |||||
stream_switch->GetName().c_str(), active_node->GetName().c_str()); | stream_switch->GetName().c_str(), active_node->GetName().c_str()); | ||||
auto find_res = switch_node_map_.find(switch_node); | auto find_res = switch_node_map_.find(switch_node); | ||||
GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), { | GE_IF_BOOL_EXEC(find_res == switch_node_map_.end(), { | ||||
@@ -18,7 +18,6 @@ | |||||
#include <map> | #include <map> | ||||
#include <set> | #include <set> | ||||
#include <string> | #include <string> | ||||
#include <utility> | |||||
#include "common/formats/format_transfers/format_transfer_fractal_nz.h" | #include "common/formats/format_transfers/format_transfer_fractal_nz.h" | ||||
#include "common/formats/format_transfers/format_transfer_fractal_z.h" | #include "common/formats/format_transfers/format_transfer_fractal_z.h" | ||||
#include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h" | ||||
@@ -28,13 +27,9 @@ | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/math/math_util.h" | #include "common/math/math_util.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "common/util/error_manager/error_manager.h" | |||||
#include "common/formats/utils/formats_trans_utils.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/common/transop_util.h" | #include "graph/common/transop_util.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/shape_refiner.h" | #include "graph/shape_refiner.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
@@ -44,29 +39,21 @@ | |||||
#include "graph/passes/aicpu_constant_folding_pass.h" | #include "graph/passes/aicpu_constant_folding_pass.h" | ||||
#include "graph/passes/assert_pass.h" | #include "graph/passes/assert_pass.h" | ||||
#include "graph/passes/assign_pass.h" | #include "graph/passes/assign_pass.h" | ||||
#include "graph/passes/base_pass.h" | |||||
#include "graph/passes/common_subexpression_elimination_pass.h" | #include "graph/passes/common_subexpression_elimination_pass.h" | ||||
#include "graph/passes/cond_pass.h" | #include "graph/passes/cond_pass.h" | ||||
#include "graph/passes/cond_remove_pass.h" | #include "graph/passes/cond_remove_pass.h" | ||||
#include "graph/passes/constant_folding_pass.h" | #include "graph/passes/constant_folding_pass.h" | ||||
#include "graph/passes/constant_fuse_same_pass.h" | |||||
#include "graph/passes/control_trigger_pass.h" | |||||
#include "graph/passes/dimension_adjust_pass.h" | #include "graph/passes/dimension_adjust_pass.h" | ||||
#include "graph/passes/dimension_compute_pass.h" | #include "graph/passes/dimension_compute_pass.h" | ||||
#include "graph/passes/dropout_pass.h" | #include "graph/passes/dropout_pass.h" | ||||
#include "graph/passes/enter_pass.h" | #include "graph/passes/enter_pass.h" | ||||
#include "graph/passes/flow_ctrl_pass.h" | |||||
#include "graph/passes/for_pass.h" | #include "graph/passes/for_pass.h" | ||||
#include "graph/passes/get_original_format_pass.h" | |||||
#include "graph/passes/guarantee_const_pass.h" | #include "graph/passes/guarantee_const_pass.h" | ||||
#include "graph/passes/hccl_group_pass.h" | #include "graph/passes/hccl_group_pass.h" | ||||
#include "graph/passes/hccl_memcpy_pass.h" | #include "graph/passes/hccl_memcpy_pass.h" | ||||
#include "graph/passes/identity_pass.h" | #include "graph/passes/identity_pass.h" | ||||
#include "graph/passes/infershape_pass.h" | #include "graph/passes/infershape_pass.h" | ||||
#include "graph/passes/iterator_op_pass.h" | |||||
#include "graph/passes/merge_pass.h" | |||||
#include "graph/passes/net_output_pass.h" | #include "graph/passes/net_output_pass.h" | ||||
#include "graph/passes/next_iteration_pass.h" | |||||
#include "graph/passes/no_use_reshape_remove_pass.h" | #include "graph/passes/no_use_reshape_remove_pass.h" | ||||
#include "graph/passes/parallel_concat_start_op_pass.h" | #include "graph/passes/parallel_concat_start_op_pass.h" | ||||
#include "graph/passes/placeholder_with_default_pass.h" | #include "graph/passes/placeholder_with_default_pass.h" | ||||
@@ -81,45 +68,18 @@ | |||||
#include "graph/passes/shape_operate_op_remove_pass.h" | #include "graph/passes/shape_operate_op_remove_pass.h" | ||||
#include "graph/passes/snapshot_pass.h" | #include "graph/passes/snapshot_pass.h" | ||||
#include "graph/passes/stop_gradient_pass.h" | #include "graph/passes/stop_gradient_pass.h" | ||||
#include "graph/passes/subgraph_pass.h" | |||||
#include "graph/passes/switch_data_edges_bypass.h" | |||||
#include "graph/passes/switch_dead_branch_elimination.h" | |||||
#include "graph/passes/switch_logic_remove_pass.h" | |||||
#include "graph/passes/merge_to_stream_merge_pass.h" | |||||
#include "graph/passes/switch_to_stream_switch_pass.h" | |||||
#include "graph/passes/attach_stream_label_pass.h" | |||||
#include "graph/passes/unused_const_pass.h" | #include "graph/passes/unused_const_pass.h" | ||||
#include "graph/passes/unused_op_remove_pass.h" | |||||
#include "graph/passes/var_is_initialized_op_pass.h" | #include "graph/passes/var_is_initialized_op_pass.h" | ||||
#include "graph/passes/variable_prepare_op_pass.h" | #include "graph/passes/variable_prepare_op_pass.h" | ||||
#include "graph/preprocess/insert_op/util_insert_aipp_op.h" | #include "graph/preprocess/insert_op/util_insert_aipp_op.h" | ||||
#include "graph/types.h" | |||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/type_utils.h" | #include "graph/utils/type_utils.h" | ||||
#include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
#include "init/gelib.h" | #include "init/gelib.h" | ||||
#include "multi_batch_copy_graph.h" | #include "multi_batch_copy_graph.h" | ||||
#include "runtime/dev.h" | |||||
#include "graph/passes/dimension_adjust_pass.h" | |||||
#include "graph/passes/link_gen_mask_nodes_pass.h" | |||||
#include "graph/passes/permute_pass.h" | |||||
#include "graph/passes/reshape_remove_pass.h" | |||||
#include "graph/passes/same_transdata_breadth_fusion_pass.h" | |||||
#include "graph/passes/transop_breadth_fusion_pass.h" | |||||
#include "graph/passes/transop_depth_fusion_pass.h" | |||||
#include "graph/passes/transop_nearby_allreduce_fusion_pass.h" | |||||
#include "graph/passes/cast_remove_pass.h" | |||||
#include "graph/passes/data_pass.h" | #include "graph/passes/data_pass.h" | ||||
#include "graph/passes/transop_without_reshape_fusion_pass.h" | |||||
#include "graph/passes/transpose_transdata_pass.h" | |||||
#include "graph/passes/variable_op_pass.h" | |||||
#include "graph/passes/variable_prepare_op_pass.h" | |||||
#include "graph/passes/variable_ref_delete_op_pass.h" | |||||
#include "graph/passes/mark_agnostic_pass.h" | #include "graph/passes/mark_agnostic_pass.h" | ||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { | ||||
static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | static std::map<std::string, ge::DataType> output_type_str_to_datatype = { | ||||
@@ -1407,11 +1407,13 @@ Status MultiBatchGraphCopyer::InsertIdentityAfterSwitchN() { | |||||
} | } | ||||
Status ProcessMultiBatch(ComputeGraphPtr &graph) { | Status ProcessMultiBatch(ComputeGraphPtr &graph) { | ||||
const char *multi_batch_with_case = std::getenv("MULTI_BATCH_WITH_CASE"); | |||||
if (multi_batch_with_case != nullptr) { | |||||
PassManager pass_manager; | |||||
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||||
return pass_manager.Run(graph); | |||||
if (GetLocalOmgContext().dynamic_node_type.empty()) { | |||||
const char *multi_batch_with_switchn = std::getenv("MULTI_BATCH_WITH_SWITCHN"); | |||||
if (multi_batch_with_switchn == nullptr) { | |||||
PassManager pass_manager; | |||||
GE_CHK_STATUS_RET(pass_manager.AddPass("MultiBatchClonePass", new (std::nothrow) MultiBatchClonePass)); | |||||
return pass_manager.Run(graph); | |||||
} | |||||
} | } | ||||
if (!GetLocalOmgContext().need_multi_batch) { | if (!GetLocalOmgContext().need_multi_batch) { | ||||
GELOGI("No need to process_multi for no_train graph."); | GELOGI("No need to process_multi for no_train graph."); | ||||
@@ -18,6 +18,7 @@ | |||||
#include <chrono> | #include <chrono> | ||||
#include "framework/common/debug/log.h" | #include "framework/common/debug/log.h" | ||||
#include "graph/compute_graph.h" | #include "graph/compute_graph.h" | ||||
#include "graph/utils/tensor_utils.h" | |||||
#include "hybrid_execution_context.h" | #include "hybrid_execution_context.h" | ||||
#include "subgraph_context.h" | #include "subgraph_context.h" | ||||
@@ -35,29 +36,31 @@ ShapeInferenceState::ShapeInferenceState(const NodeItem &node_item) : node_item( | |||||
this->num_pending_shapes_); | this->num_pending_shapes_); | ||||
} | } | ||||
Status ShapeInferenceState::UpdateInputShape(int idx, | |||||
const GeShape &ori_shape, | |||||
const GeShape &shape) { | |||||
Status ShapeInferenceState::UpdateInputShape(int idx, const GeTensorDesc &target) { | |||||
if (node_item.IsInputShapeStatic(idx)) { | if (node_item.IsInputShapeStatic(idx)) { | ||||
GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | GELOGD("[%s] Trying to update static shape, idx = %d. old shape = [%s], new shape = [%s]", | ||||
node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
idx, | idx, | ||||
node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), | node_item.MutableInputDesc(idx)->GetShape().ToString().c_str(), | ||||
shape.ToString().c_str()); | |||||
target.GetShape().ToString().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s]", | |||||
int64_t tensor_size = -1; | |||||
(void) TensorUtils::GetSize(target, tensor_size); | |||||
GELOGD("[%s] Update input shape [%d] with Shape: [%s] and OriginalShape: [%s], size = %ld", | |||||
node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
idx, | idx, | ||||
shape.ToString().c_str(), | |||||
ori_shape.ToString().c_str()); | |||||
target.GetShape().ToString().c_str(), | |||||
target.GetOriginShape().ToString().c_str(), | |||||
tensor_size); | |||||
std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
auto tensor_desc = node_item.MutableInputDesc(idx); | auto tensor_desc = node_item.MutableInputDesc(idx); | ||||
GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
tensor_desc->SetShape(shape); | |||||
tensor_desc->SetOriginShape(ori_shape); | |||||
tensor_desc->SetShape(target.GetShape()); | |||||
tensor_desc->SetOriginShape(target.GetOriginShape()); | |||||
(void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||||
if (--num_pending_shapes_ == 0) { | if (--num_pending_shapes_ == 0) { | ||||
ready_cv_.notify_all(); | ready_cv_.notify_all(); | ||||
} | } | ||||
@@ -110,24 +113,24 @@ Status ShapeInferenceState::AwaitShapesReady(const GraphExecutionContext &contex | |||||
for (auto &p : shape_futures) { | for (auto &p : shape_futures) { | ||||
auto idx = p.first; | auto idx = p.first; | ||||
auto &future = p.second; | auto &future = p.second; | ||||
GeShape shape; | |||||
GeShape ori_shape; | |||||
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] Start", idx); | ||||
GE_CHK_STATUS_RET(future.Get(ori_shape, shape), | |||||
"[%s] Get shape failed. index = %u", | |||||
node_item.NodeName().c_str(), | |||||
idx); | |||||
auto src_tensor_desc = future.GetTensorDesc(); | |||||
GE_CHECK_NOTNULL(src_tensor_desc); | |||||
RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | RECORD_SHAPE_INFERENCE_EVENT(&context, node_item.NodeName().c_str(), "[AwaitShape] [idx = %u] End", idx); | ||||
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s]", | |||||
node_item.NodeName().c_str(), | |||||
idx, | |||||
shape.ToString().c_str(), | |||||
ori_shape.ToString().c_str()); | |||||
auto input_desc = node_item.MutableInputDesc(idx); | auto input_desc = node_item.MutableInputDesc(idx); | ||||
GE_CHECK_NOTNULL(input_desc); | GE_CHECK_NOTNULL(input_desc); | ||||
input_desc->SetShape(std::move(shape)); | |||||
input_desc->SetOriginShape(ori_shape); | |||||
int64_t tensor_size = -1; | |||||
(void) TensorUtils::GetSize(*src_tensor_desc, tensor_size); | |||||
GELOGD("[%s] Update input shape [%u] with shape: [%s] and ori_shape: [%s], index = %zu", | |||||
node_item.NodeName().c_str(), | |||||
idx, | |||||
src_tensor_desc->GetShape().ToString().c_str(), | |||||
src_tensor_desc->GetOriginShape().ToString().c_str(), | |||||
tensor_size); | |||||
input_desc->SetShape(src_tensor_desc->GetShape()); | |||||
input_desc->SetOriginShape(src_tensor_desc->GetOriginShape()); | |||||
(void) TensorUtils::SetSize(*input_desc, tensor_size); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -190,5 +193,14 @@ Status ShapeFuture::Get(GeShape &ori_shape, GeShape &shape) { | |||||
GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | GELOGD("Get shape from %s:%u. shape = [%s]", src_node_->GetName().c_str(), src_index_, shape.ToString().c_str()); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GeTensorDescPtr ShapeFuture::GetTensorDesc() { | |||||
GELOGD("Start to wait node: %s for getting shape", src_node_->GetName().c_str()); | |||||
if (!subgraph_context_->Await(src_node_)) { | |||||
GELOGE(INTERNAL_ERROR, "cancelled"); | |||||
return nullptr; | |||||
} | |||||
return src_node_->GetOpDesc()->MutableOutputDesc(src_index_); | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -35,6 +35,7 @@ class ShapeFuture { | |||||
ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ShapeFuture(NodePtr src_node, uint32_t src_index, SubgraphContext *subgraph_context); | ||||
~ShapeFuture() = default; | ~ShapeFuture() = default; | ||||
Status Get(GeShape &ori_shape, GeShape &shape); | Status Get(GeShape &ori_shape, GeShape &shape); | ||||
GeTensorDescPtr GetTensorDesc(); | |||||
private: | private: | ||||
NodePtr src_node_; | NodePtr src_node_; | ||||
@@ -45,7 +46,7 @@ class ShapeFuture { | |||||
struct ShapeInferenceState { | struct ShapeInferenceState { | ||||
explicit ShapeInferenceState(const NodeItem &node_item); | explicit ShapeInferenceState(const NodeItem &node_item); | ||||
Status UpdateInputShape(int idx, const GeShape &ori_shape, const GeShape &shape); | |||||
Status UpdateInputShape(int idx, const GeTensorDesc &tensor_desc); | |||||
void UpdateInputShapeFuture(int idx, ShapeFuture &&future); | void UpdateInputShapeFuture(int idx, ShapeFuture &&future); | ||||
@@ -96,7 +96,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue | |||||
GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); | auto node_state = subgraph_context_->GetOrCreateNodeState(input_node); | ||||
GE_CHECK_NOTNULL(node_state); | GE_CHECK_NOTNULL(node_state); | ||||
node_state->GetShapeInferenceState().UpdateInputShape(0, tensor_desc->GetOriginShape(), tensor_desc->GetShape()); | |||||
node_state->GetShapeInferenceState().UpdateInputShape(0, *tensor_desc); | |||||
} | } | ||||
} | } | ||||
@@ -268,13 +268,6 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta | |||||
} else { | } else { | ||||
node_state.SetKernelTask(node_item.kernel_task); | node_state.SetKernelTask(node_item.kernel_task); | ||||
} | } | ||||
GELOGD("[%s] Start to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||||
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||||
GE_CHK_STATUS_RET(NodeExecutorManager::GetInstance().CalcOpRunningParam(*node_item.node), | |||||
"[%s] Failed to invoke CalcOpRunningParam.", node_item.NodeName().c_str()); | |||||
RECORD_COMPILE_EVENT(ctx, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||||
GELOGD("[%s] Done invoking CalcOpRunningParam successfully.", node_item.NodeName().c_str()); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -20,12 +20,9 @@ | |||||
#include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
#include "common/dump/dump_manager.h" | |||||
#include "hybrid/executor//worker//shape_inference_engine.h" | |||||
#include "common/dump/dump_op.h" | #include "common/dump/dump_op.h" | ||||
#include "common/types.h" | |||||
#include "common/ge_types.h" | |||||
#include "common/profiling/profiling_manager.h" | #include "common/profiling/profiling_manager.h" | ||||
#include "runtime/base.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -348,6 +345,10 @@ Status NodeDoneCallback::OnNodeDone() { | |||||
} | } | ||||
GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); | GE_CHK_STATUS_RET_NOLOG(PrepareConstInputs(node_item)); | ||||
if (node_item.shape_inference_type == DEPEND_SHAPE_RANGE || node_item.shape_inference_type == DEPEND_COMPUTE) { | |||||
// update output tensor sizes | |||||
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(node_item)); | |||||
} | |||||
// PropagateOutputs for type == DEPEND_COMPUTE | // PropagateOutputs for type == DEPEND_COMPUTE | ||||
if (node_item.shape_inference_type == DEPEND_COMPUTE) { | if (node_item.shape_inference_type == DEPEND_COMPUTE) { | ||||
if (graph_context_->trace_enabled) { | if (graph_context_->trace_enabled) { | ||||
@@ -17,9 +17,15 @@ | |||||
#include "hybrid/executor/worker/shape_inference_engine.h" | #include "hybrid/executor/worker/shape_inference_engine.h" | ||||
#include "graph/shape_refiner.h" | #include "graph/shape_refiner.h" | ||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | |||||
#include "graph/utils/type_utils.h" | |||||
#include "common/math/math_util.h" | |||||
#include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
namespace ge { | namespace ge { | ||||
namespace { | |||||
const int kAlignment = 32; | |||||
} | |||||
namespace hybrid { | namespace hybrid { | ||||
ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) | ShapeInferenceEngine::ShapeInferenceEngine(GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) | ||||
: execution_context_(execution_context), | : execution_context_(execution_context), | ||||
@@ -40,7 +46,9 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
} | } | ||||
if (node_item.fused_subgraph != nullptr) { | if (node_item.fused_subgraph != nullptr) { | ||||
return InferShapeForSubgraph(node_item, *node_item.fused_subgraph); | |||||
GE_CHK_STATUS_RET_NOLOG(InferShapeForSubgraph(node_item, *node_item.fused_subgraph)); | |||||
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item)); | |||||
return SUCCESS; | |||||
} | } | ||||
// Skip shape inference for node of type DEPEND_COMPUTE | // Skip shape inference for node of type DEPEND_COMPUTE | ||||
@@ -63,21 +71,15 @@ Status ShapeInferenceEngine::InferShape(NodeState &node_state) { | |||||
std::lock_guard<std::mutex> lk(mu_); | std::lock_guard<std::mutex> lk(mu_); | ||||
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] Start"); | ||||
GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), | GE_CHK_STATUS_RET(ShapeRefiner::InferShapeAndTypeForRunning(node_item.node, true), | ||||
"Invoke InferShapeAndType failed."); | |||||
"Invoke InferShapeAndType failed."); | |||||
RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | RECORD_SHAPE_INFERENCE_EVENT(execution_context_, node_item.NodeName().c_str(), "[InferShapeAndType] End"); | ||||
} | } | ||||
// Check again to make sure shape is valid after shape inference | |||||
if (node_item.shape_inference_type != DEPEND_SHAPE_RANGE) { | |||||
bool is_unknown_shape = false; | |||||
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node_item.node, is_unknown_shape), | |||||
"Failed to get shape status. node = %s", | |||||
node_item.NodeName().c_str()); | |||||
GE_CHK_BOOL_RET_STATUS(!is_unknown_shape, | |||||
INTERNAL_ERROR, | |||||
"[%s] Shape is still unknown after shape inference.", | |||||
node_item.NodeName().c_str()); | |||||
} | |||||
// update output tensor sizes after shape inference | |||||
// error if shape is still unknown and not of type DEPEND_SHAPE_RANGE | |||||
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] Start"); | |||||
GE_CHK_STATUS_RET_NOLOG(CalcOutputTensorSizes(node_item, node_item.shape_inference_type == DEPEND_SHAPE_RANGE)); | |||||
RECORD_COMPILE_EVENT(execution_context_, node_item.NodeName().c_str(), "[CalcOpRunningParam] End"); | |||||
GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", | GELOGD("[%s] [HybridTrace] After shape inference. Node = %s", | ||||
node_item.NodeName().c_str(), | node_item.NodeName().c_str(), | ||||
@@ -127,8 +129,6 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||||
// propagate each output | // propagate each output | ||||
for (int i = 0; i < node_item.num_outputs; ++i) { | for (int i = 0; i < node_item.num_outputs; ++i) { | ||||
auto output_desc = node_item.op_desc->MutableOutputDesc(i); | auto output_desc = node_item.op_desc->MutableOutputDesc(i); | ||||
const auto &shape = output_desc->MutableShape(); | |||||
const auto &ori_shape = output_desc->GetOriginShape(); | |||||
auto &output_nodes = node_item.outputs[i]; | auto &output_nodes = node_item.outputs[i]; | ||||
// propagate output to all sub-inputs | // propagate output to all sub-inputs | ||||
@@ -149,9 +149,7 @@ Status ShapeInferenceEngine::PropagateOutputShapes(const NodeItem &node_item) { | |||||
infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, | infer_state.UpdateInputShapeFuture(dst_input_index_and_node.first, | ||||
std::move(future)); | std::move(future)); | ||||
} else { | } else { | ||||
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, | |||||
ori_shape, | |||||
shape)); | |||||
GE_CHK_STATUS_RET_NOLOG(infer_state.UpdateInputShape(dst_input_index_and_node.first, *output_desc)); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -230,5 +228,92 @@ Status ShapeInferenceEngine::UpdatePeerNodeShape(const Node &node) { | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status ShapeInferenceEngine::CanonicalizeShape(GeTensorDesc &tensor_desc, | |||||
std::vector<int64_t> &shape, | |||||
bool fallback_with_range) { | |||||
const auto &tensor_shape = tensor_desc.MutableShape(); | |||||
if (tensor_shape.IsUnknownShape()) { | |||||
if (!fallback_with_range) { | |||||
GELOGE(INTERNAL_ERROR, "Output shape is still unknown after shape inference. shape = [%s]", | |||||
tensor_shape.ToString().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
GELOGD("Calc output size by range"); | |||||
std::vector<std::pair<int64_t, int64_t>> shape_range; | |||||
GE_CHK_GRAPH_STATUS_RET(tensor_desc.GetShapeRange(shape_range), "Failed to get shape range"); | |||||
if (shape_range.size() != shape.size()) { | |||||
GELOGE(INTERNAL_ERROR, "Number of shape ranges (%zu) mismatches that of dims (%zu)", | |||||
shape_range.size(), | |||||
shape.size()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
for (size_t dim_index = 0; dim_index < shape.size(); ++dim_index) { | |||||
if (shape[dim_index] == ge::UNKNOWN_DIM) { | |||||
shape[dim_index] = shape_range[dim_index].second; | |||||
} | |||||
} | |||||
GELOGD("After canonicalization, shape = [%s], before = [%s]", | |||||
GeShape(shape).ToString().c_str(), | |||||
tensor_shape.ToString().c_str()); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
Status ShapeInferenceEngine::CalcTensorSize(DataType data_type, | |||||
const std::vector<int64_t> &shape, | |||||
int64_t &tensor_size) { | |||||
GELOGD("To calc tensor size by shape = [%s]", GeShape(shape).ToString().c_str()); | |||||
uint32_t type_size; | |||||
if (!TypeUtils::GetDataTypeLength(data_type, type_size)) { | |||||
GELOGE(INTERNAL_ERROR, "Failed to get data type size"); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
tensor_size = type_size; | |||||
for (const auto &dim : shape) { | |||||
GE_CHECK_GE(dim, 0); | |||||
GE_CHK_STATUS_RET(Int64MulCheckOverflow(tensor_size, dim), | |||||
"Shape size overflow, shape = [%s]", | |||||
GeShape(shape).ToString().c_str()); | |||||
tensor_size *= dim; | |||||
} | |||||
GE_CHK_STATUS_RET(CheckInt64AddOverflow(tensor_size, kAlignment - 1), | |||||
"Tensor size is too large: %ld, shape = [%s]", | |||||
tensor_size, | |||||
GeShape(shape).ToString().c_str()); | |||||
tensor_size = (tensor_size + kAlignment - 1) / kAlignment * kAlignment; | |||||
return SUCCESS; | |||||
} | |||||
Status ShapeInferenceEngine::CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range) { | |||||
auto op_desc = node_item.GetOpDesc(); | |||||
for (size_t output_index = 0; output_index < op_desc->GetOutputsSize(); ++output_index) { | |||||
auto tensor_desc = op_desc->MutableOutputDesc(output_index); | |||||
GE_CHECK_NOTNULL(tensor_desc); | |||||
const auto &shape = tensor_desc->MutableShape(); | |||||
// modify on copy | |||||
auto dims = shape.GetDims(); | |||||
GE_CHK_STATUS_RET(CanonicalizeShape(*tensor_desc, dims, fallback_with_range), | |||||
"[%s] Failed to canonicalize shape for output %zu", | |||||
node_item.NodeName().c_str(), | |||||
output_index); | |||||
int64_t tensor_size; | |||||
GE_CHK_STATUS_RET(CalcTensorSize(tensor_desc->GetDataType(), dims, tensor_size), | |||||
"[%s] Failed to calc tensor size for output %zu", | |||||
node_item.NodeName().c_str(), | |||||
output_index); | |||||
GELOGD("[%s] Tensor size of output %zu = %ld", node_item.NodeName().c_str(), output_index, tensor_size); | |||||
(void) TensorUtils::SetSize(*tensor_desc, tensor_size); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -34,7 +34,11 @@ class ShapeInferenceEngine { | |||||
Status PropagateOutputShapes(const NodeItem &node_item); | Status PropagateOutputShapes(const NodeItem &node_item); | ||||
static Status CalcOutputTensorSizes(const NodeItem &node_item, bool fallback_with_range = false); | |||||
private: | private: | ||||
static Status CanonicalizeShape(GeTensorDesc &tensor_desc, std::vector<int64_t> &shape, bool fallback_with_range); | |||||
static Status CalcTensorSize(DataType data_type, const std::vector<int64_t> &shape, int64_t &tensor_size); | |||||
static Status UpdatePeerNodeShape(const Node &node); | static Status UpdatePeerNodeShape(const Node &node); | ||||
Status AwaitDependentNodes(NodeState &node_state); | Status AwaitDependentNodes(NodeState &node_state); | ||||
@@ -22,6 +22,7 @@ | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
#include "hybrid/node_executor/node_executor.h" | #include "hybrid/node_executor/node_executor.h" | ||||
#include "hybrid/executor/worker/shape_inference_engine.h" | |||||
namespace ge { | namespace ge { | ||||
namespace hybrid { | namespace hybrid { | ||||
@@ -47,7 +48,7 @@ Status ParseInputMapping(Node &node, OpDesc &op_desc, FusedSubgraph &fused_subgr | |||||
GE_CHECK_NOTNULL(dst_op_desc); | GE_CHECK_NOTNULL(dst_op_desc); | ||||
auto in_idx = node_and_anchor.second->GetIdx(); | auto in_idx = node_and_anchor.second->GetIdx(); | ||||
auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); | auto tensor_desc = dst_op_desc->MutableInputDesc(in_idx); | ||||
fused_subgraph.input_mapping[parent_index].emplace_back(tensor_desc); | |||||
fused_subgraph.input_mapping[static_cast<int>(parent_index)].emplace_back(tensor_desc); | |||||
GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); | GELOGD("Input[%u] mapped to [%s:%u]", parent_index, dst_op_desc->GetName().c_str(), in_idx); | ||||
} | } | ||||
@@ -64,7 +65,7 @@ Status ParseOutputMapping(const OpDescPtr &op_desc, FusedSubgraph &fused_subgrap | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
fused_subgraph.output_mapping.emplace(parent_index, op_desc); | |||||
fused_subgraph.output_mapping.emplace(static_cast<int>(parent_index), op_desc); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -126,12 +127,7 @@ Status NodeItem::Create(const NodePtr &node, std::unique_ptr<NodeItem> &node_ite | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status NodeItem::Init() { | |||||
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||||
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||||
num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||||
num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||||
void NodeItem::ResolveOptionalInputs() { | |||||
if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { | if (op_desc->GetAllInputsSize() != op_desc->GetInputsSize()) { | ||||
has_optional_inputs = true; | has_optional_inputs = true; | ||||
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { | ||||
@@ -143,7 +139,18 @@ Status NodeItem::Init() { | |||||
} | } | ||||
} | } | ||||
} | } | ||||
} | |||||
Status NodeItem::InitInputsAndOutputs() { | |||||
GE_CHECK_LE(op_desc->GetInputsSize(), INT32_MAX); | |||||
GE_CHECK_LE(op_desc->GetOutputsSize(), INT32_MAX); | |||||
num_inputs = static_cast<int>(op_desc->GetInputsSize()); | |||||
num_outputs = static_cast<int>(op_desc->GetOutputsSize()); | |||||
ResolveOptionalInputs(); | |||||
return SUCCESS; | |||||
} | |||||
Status NodeItem::ResolveDynamicState() { | |||||
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); | ||||
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); | ||||
if (!is_dynamic) { | if (!is_dynamic) { | ||||
@@ -151,38 +158,54 @@ Status NodeItem::Init() { | |||||
"[%s] Failed to get shape status.", | "[%s] Failed to get shape status.", | ||||
node->GetName().c_str()); | node->GetName().c_str()); | ||||
} | } | ||||
return SUCCESS; | |||||
} | |||||
if (is_dynamic) { | |||||
for (int i = 0; i < num_inputs; ++i) { | |||||
const auto &input_desc = MutableInputDesc(i); | |||||
GE_CHECK_NOTNULL(input_desc); | |||||
if (input_desc->MutableShape().IsUnknownShape()) { | |||||
is_input_shape_static_.push_back(false); | |||||
} else { | |||||
num_static_input_shapes++; | |||||
is_input_shape_static_.push_back(true); | |||||
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||||
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||||
} | |||||
Status NodeItem::ResolveStaticInputsAndOutputs() { | |||||
for (int i = 0; i < num_inputs; ++i) { | |||||
const auto &input_desc = MutableInputDesc(i); | |||||
GE_CHECK_NOTNULL(input_desc); | |||||
if (input_desc->MutableShape().IsUnknownShape()) { | |||||
is_input_shape_static_.push_back(false); | |||||
} else { | |||||
num_static_input_shapes++; | |||||
is_input_shape_static_.push_back(true); | |||||
GELOGD("[%s] The shape of input[%d] is static. shape = [%s]", | |||||
NodeName().c_str(), i, input_desc->MutableShape().ToString().c_str()); | |||||
} | } | ||||
} | |||||
for (int i = 0; i < num_outputs; ++i) { | |||||
const auto &output_desc = op_desc->MutableOutputDesc(i); | |||||
GE_CHECK_NOTNULL(output_desc); | |||||
if (output_desc->MutableShape().IsUnknownShape()) { | |||||
is_output_shape_static = false; | |||||
break; | |||||
} | |||||
for (int i = 0; i < num_outputs; ++i) { | |||||
const auto &output_desc = op_desc->MutableOutputDesc(i); | |||||
GE_CHECK_NOTNULL(output_desc); | |||||
if (output_desc->MutableShape().IsUnknownShape()) { | |||||
is_output_shape_static = false; | |||||
break; | |||||
} | } | ||||
} | |||||
if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||||
shape_inference_type = DEPEND_COMPUTE; | |||||
} else { | |||||
int32_t unknown_shape_type_val = 0; | |||||
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
} | |||||
if (is_output_shape_static) { | |||||
GE_CHK_STATUS_RET_NOLOG(ShapeInferenceEngine::CalcOutputTensorSizes(*this)); | |||||
} | |||||
return SUCCESS; | |||||
} | |||||
void NodeItem::ResolveUnknownShapeType() { | |||||
if (IsControlOp() || node_type == PARTITIONEDCALL) { | |||||
shape_inference_type = DEPEND_COMPUTE; | |||||
} else { | |||||
int32_t unknown_shape_type_val = 0; | |||||
(void) AttrUtils::GetInt(op_desc, ::ge::ATTR_NAME_UNKNOWN_SHAPE_TYPE, unknown_shape_type_val); | |||||
shape_inference_type = static_cast<UnknowShapeOpType>(unknown_shape_type_val); | |||||
} | |||||
} | |||||
Status NodeItem::Init() { | |||||
GE_CHK_STATUS_RET_NOLOG(InitInputsAndOutputs()); | |||||
GE_CHK_STATUS_RET_NOLOG(ResolveDynamicState()); | |||||
if (is_dynamic) { | |||||
ResolveUnknownShapeType(); | |||||
GE_CHK_STATUS_RET_NOLOG(ResolveStaticInputsAndOutputs()); | |||||
GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); | GE_CHK_STATUS_RET(ParseFusedSubgraph(*this), "[%s] Failed to parse fused subgraph", node_name.c_str()); | ||||
} | } | ||||
@@ -103,6 +103,11 @@ struct NodeItem { | |||||
private: | private: | ||||
explicit NodeItem(NodePtr node); | explicit NodeItem(NodePtr node); | ||||
Status Init(); | Status Init(); | ||||
Status InitInputsAndOutputs(); | |||||
void ResolveOptionalInputs(); | |||||
Status ResolveDynamicState(); | |||||
Status ResolveStaticInputsAndOutputs(); | |||||
void ResolveUnknownShapeType(); | |||||
std::vector<bool> is_input_shape_static_; | std::vector<bool> is_input_shape_static_; | ||||
std::vector<uint32_t> input_desc_indices_; | std::vector<uint32_t> input_desc_indices_; | ||||
@@ -42,10 +42,10 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||||
GELOGE(FAILED, "hccl handle is nullptr! "); | GELOGE(FAILED, "hccl handle is nullptr! "); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
auto EnqueueHcomOpertion = (HcclResult(*)(HcomOpertion, std::function<void(HcclResult status)>))dlsym( | |||||
context.handle_, "EnqueueHcomOpertion"); | |||||
if (EnqueueHcomOpertion == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke EnqueueHcomOpertion hcom unknown node function."); | |||||
auto HcomExecEnqueueOperation = (HcclResult(*)(HcomOperation, std::function<void(HcclResult status)>))dlsym( | |||||
context.handle_, "HcomExecEnqueueOperation"); | |||||
if (HcomExecEnqueueOperation == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExecEnqueueOperation hcom unknown node function."); | |||||
if (dlclose(context.handle_) != 0) { | if (dlclose(context.handle_) != 0) { | ||||
GELOGW("Failed to close handle %s", dlerror()); | GELOGW("Failed to close handle %s", dlerror()); | ||||
} | } | ||||
@@ -70,7 +70,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||||
const OpDescPtr op_desc = node_item.GetOpDesc(); | const OpDescPtr op_desc = node_item.GetOpDesc(); | ||||
GE_CHECK_NOTNULL(op_desc); | GE_CHECK_NOTNULL(op_desc); | ||||
HcomOpertion op_info; | |||||
HcomOperation op_info; | |||||
op_info.hcclType = op_desc->GetType(); | op_info.hcclType = op_desc->GetType(); | ||||
op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; | op_info.inputPtr = inputs.empty() ? nullptr : inputs[0]; | ||||
op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; | op_info.outputPtr = outputs.empty() ? nullptr : outputs[0]; | ||||
@@ -96,7 +96,7 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||||
op_info.root = root_id; | op_info.root = root_id; | ||||
auto callback = [this, op_desc](HcclResult status) { | auto callback = [this, op_desc](HcclResult status) { | ||||
if (status != HCCL_SUCCESS) { | if (status != HCCL_SUCCESS) { | ||||
GELOGE(HCCL_E_INTERNAL, "node %s call EnqueueHcomOpertion failed, ret: 0x%X", op_desc->GetName().c_str(), status); | |||||
GELOGE(HCCL_E_INTERNAL, "node %s call HcomExecEnqueueOperation failed, ret: 0x%X", op_desc->GetName().c_str(), status); | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(this->hccl_mutex_); | std::lock_guard<std::mutex> lock(this->hccl_mutex_); | ||||
this->cond_.notify_all(); | this->cond_.notify_all(); | ||||
@@ -110,9 +110,9 @@ Status HcclNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||||
context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root); | context.GetNodeName(), op_info.hcclType.c_str(), count, op_info.dataType, op_info.opType, op_info.root); | ||||
op_info.count = count; | op_info.count = count; | ||||
HcclResult hccl_ret = EnqueueHcomOpertion(op_info, callback); | |||||
HcclResult hccl_ret = HcomExecEnqueueOperation(op_info, callback); | |||||
if (hccl_ret != HCCL_SUCCESS) { | if (hccl_ret != HCCL_SUCCESS) { | ||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||||
return HCCL_E_INTERNAL; | return HCCL_E_INTERNAL; | ||||
} | } | ||||
@@ -213,11 +213,11 @@ Status RdmaNodeTask::ExtractTensor(TaskContext &context, vector<HcomRemoteAccess | |||||
Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> done_callback) { | ||||
GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); | GELOGI("[%s] RdmaNodeTask::ExecuteAsync in.", context.GetNodeName()); | ||||
auto EnqueueRemoteAccess = | |||||
auto HcomExecEnqueueRemoteAccess = | |||||
(HcclResult(*)(const string &, const vector<HcomRemoteAccessAddrInfo> &, | (HcclResult(*)(const string &, const vector<HcomRemoteAccessAddrInfo> &, | ||||
std::function<void(HcclResult status)>))dlsym(context.handle_, "EnqueueRemoteAccess"); | |||||
if (EnqueueRemoteAccess == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke EnqueueRemoteAccess hcom unknown node function."); | |||||
std::function<void(HcclResult status)>))dlsym(context.handle_, "HcomExecEnqueueRemoteAccess"); | |||||
if (HcomExecEnqueueRemoteAccess == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExecEnqueueRemoteAccess hcom unknown node function."); | |||||
if (dlclose(context.handle_) != 0) { | if (dlclose(context.handle_) != 0) { | ||||
GELOGW("Failed to close handle %s", dlerror()); | GELOGW("Failed to close handle %s", dlerror()); | ||||
} | } | ||||
@@ -228,15 +228,15 @@ Status RdmaNodeTask::ExecuteAsync(TaskContext &context, std::function<void()> do | |||||
auto callback = [this](HcclResult status) { | auto callback = [this](HcclResult status) { | ||||
if (status != HCCL_SUCCESS) { | if (status != HCCL_SUCCESS) { | ||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", status); | |||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", status); | |||||
} | } | ||||
std::lock_guard<std::mutex> lock(this->hccl_mutex_); | std::lock_guard<std::mutex> lock(this->hccl_mutex_); | ||||
this->cond_.notify_all(); | this->cond_.notify_all(); | ||||
GELOGI("rdma callback success."); | GELOGI("rdma callback success."); | ||||
}; | }; | ||||
HcclResult hccl_ret = EnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); | |||||
HcclResult hccl_ret = HcomExecEnqueueRemoteAccess(context.GetNodeItem().NodeType(), addr_infos, callback); | |||||
if (hccl_ret != HCCL_SUCCESS) { | if (hccl_ret != HCCL_SUCCESS) { | ||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||||
GELOGE(HCCL_E_INTERNAL, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||||
return HCCL_E_INTERNAL; | return HCCL_E_INTERNAL; | ||||
} | } | ||||
@@ -307,32 +307,32 @@ Status HcclNodeExecutor::Initialize() { | |||||
GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); | GELOGE(GE_PLGMGR_SO_NOT_EXIST, "Failed in dlopen %s! ", dlerror()); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
auto HcomExcutorInitialize = (HcclResult(*)())dlsym(handle_, "HcomExcutorInitialize"); | |||||
if (HcomExcutorInitialize == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExcutorInitialize hcom unknown node function."); | |||||
auto HcomExecInitialize = (HcclResult(*)())dlsym(handle_, "HcomExecInitialize"); | |||||
if (HcomExecInitialize == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExecInitialize hcom unknown node function."); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
HcclResult hccl_ret = HcomExcutorInitialize(); | |||||
HcclResult hccl_ret = HcomExecInitialize(); | |||||
if (hccl_ret == HCCL_E_PTR) { | if (hccl_ret == HCCL_E_PTR) { | ||||
GELOGI("Hccl comm is null, hcom executor initialize is not required."); | GELOGI("Hccl comm is null, hcom executor initialize is not required."); | ||||
} else if (hccl_ret == HCCL_SUCCESS) { | } else if (hccl_ret == HCCL_SUCCESS) { | ||||
GELOGI("Hcom executor initialize success."); | GELOGI("Hcom executor initialize success."); | ||||
} else { | } else { | ||||
GELOGE(FAILED, "Call HcomExcutorInitialize failed, ret: 0x%X", hccl_ret); | |||||
GELOGE(FAILED, "Call HcomExecInitialize failed, ret: 0x%X", hccl_ret); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status HcclNodeExecutor::Finalize() { | Status HcclNodeExecutor::Finalize() { | ||||
auto HcomExcutorFinalize = (HcclResult(*)())dlsym(handle_, "HcomExcutorFinalize"); | |||||
if (HcomExcutorFinalize == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExcutorFinalize hcom unknown node function."); | |||||
auto HcomExecFinalize = (HcclResult(*)())dlsym(handle_, "HcomExecFinalize"); | |||||
if (HcomExecFinalize == nullptr) { | |||||
GELOGE(FAILED, "Failed to invoke HcomExecFinalize hcom unknown node function."); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
HcclResult hccl_ret = HcomExcutorFinalize(); | |||||
HcclResult hccl_ret = HcomExecFinalize(); | |||||
if (hccl_ret != HCCL_SUCCESS) { | if (hccl_ret != HCCL_SUCCESS) { | ||||
GELOGE(FAILED, "Call HcomExcutorFinalize failed, ret: 0x%X", hccl_ret); | |||||
GELOGE(FAILED, "Call HcomExecFinalize failed, ret: 0x%X", hccl_ret); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// dlclose file handle | // dlclose file handle | ||||
@@ -148,6 +148,10 @@ Status TaskContext::AllocateWorkspaces() { | |||||
} | } | ||||
Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const { | Status TaskContext::RegisterCallback(const std::function<void()> &callback_fun) const { | ||||
if (callback_fun == nullptr) { | |||||
GELOGW("[%s] Callback is NULL", GetNodeName()); | |||||
return SUCCESS; | |||||
} | |||||
auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); | auto ret = execution_context_->callback_manager->RegisterCallback(callback_fun); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); | GELOGE(ret, "[%s] Failed to register callback", GetNodeName()); | ||||
@@ -384,6 +388,20 @@ const char *TaskContext::GetNodeName() const { | |||||
return node_item_->NodeName().c_str(); | return node_item_->NodeName().c_str(); | ||||
} | } | ||||
void TaskContext::ReleaseInputsAndOutputs() { | |||||
for (int i = 0; i < node_item_->num_inputs; ++i) { | |||||
auto tensor = inputs_start_ + i; | |||||
tensor->Destroy(); | |||||
GELOGD("[%s] Tensor of input[%d] released", GetNodeName(), i); | |||||
} | |||||
for (int i = 0; i < node_item_->num_outputs; ++i) { | |||||
auto tensor = outputs_start_ + i; | |||||
tensor->Destroy(); | |||||
GELOGD("[%s] Tensor of output[%d] released", GetNodeName(), i); | |||||
} | |||||
} | |||||
void TaskContext::ReleaseInput(int index) { | void TaskContext::ReleaseInput(int index) { | ||||
auto input_tensor = MutableInput(index); | auto input_tensor = MutableInput(index); | ||||
if (input_tensor != nullptr) { | if (input_tensor != nullptr) { | ||||
@@ -456,5 +474,9 @@ Status TaskContext::TryExecuteCallback(const function<void()> &callback_fun) con | |||||
const DumpProperties &TaskContext::GetDumpProperties() const { | const DumpProperties &TaskContext::GetDumpProperties() const { | ||||
return execution_context_->dump_properties; | return execution_context_->dump_properties; | ||||
} | } | ||||
bool TaskContext::NeedCallback() { | |||||
return node_item_->has_observer || IsDumpEnabled() || execution_context_->profiling_level > 0; | |||||
} | |||||
} // namespace hybrid | } // namespace hybrid | ||||
} // namespace ge | } // namespace ge |
@@ -50,6 +50,8 @@ class TaskContext { | |||||
ConstGeTensorDescPtr GetOutputDesc(int index) const; | ConstGeTensorDescPtr GetOutputDesc(int index) const; | ||||
GeTensorDescPtr MutableInputDesc(int index) const; | GeTensorDescPtr MutableInputDesc(int index) const; | ||||
GeTensorDescPtr MutableOutputDesc(int index) const; | GeTensorDescPtr MutableOutputDesc(int index) const; | ||||
void ReleaseInputsAndOutputs(); | |||||
bool NeedCallback(); | |||||
void ReleaseInput(int index); | void ReleaseInput(int index); | ||||
const TensorValue *GetInput(int index) const; | const TensorValue *GetInput(int index) const; | ||||
const TensorValue *GetOutput(int index) const; | const TensorValue *GetOutput(int index) const; | ||||
@@ -63,6 +63,19 @@ vector<string> SplitInputShape(const std::string &input_shape) { | |||||
} | } | ||||
} // namespace | } // namespace | ||||
Status CheckInputFormat(const string &input_format) { | |||||
if (input_format.empty()) { | |||||
return ge::SUCCESS; | |||||
} | |||||
if (!ge::TypeUtils::IsFormatValid(input_format.c_str())) { | |||||
ErrorManager::GetInstance().ATCReportErrMessage( | |||||
"E10001", {"parameter", "value", "reason"}, {"--input_format", input_format, "input format is invalid!"}); | |||||
GELOGE(ge::PARAM_INVALID, "input format [%s] is invalid!", input_format.c_str()); | |||||
return ge::PARAM_INVALID; | |||||
} | |||||
return ge::SUCCESS; | |||||
} | |||||
bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | bool CheckDynamicBatchSizeInputShapeValid(unordered_map<string, vector<int64_t>> shape_map, | ||||
std::string &dynamic_batch_size) { | std::string &dynamic_batch_size) { | ||||
int32_t size = 0; | int32_t size = 0; | ||||
@@ -75,6 +75,7 @@ Status CheckInsertOpConfParamValid(const std::string insert_op_conf); | |||||
Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory); | Status CheckDisableReuseMemoryParamValid(const std::string disable_reuse_memory); | ||||
Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); | Status CheckEnableSingleStreamParamValid(const std::string enable_single_stream); | ||||
Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); | Status CheckImplmodeParamValid(const std::string &optypelist_for_implmode, std::string &op_select_implmode); | ||||
Status CheckInputFormat(const string &input_format); | |||||
void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips); | void PrintOptionMap(std::map<std::string, std::string> &options, std::string tips); | ||||
void EraseEndSemicolon(std::string ¶m); | void EraseEndSemicolon(std::string ¶m); | ||||
} | } | ||||
@@ -227,7 +227,6 @@ class Impl { | |||||
~Impl() { (void)generator_.Finalize(); }; | ~Impl() { (void)generator_.Finalize(); }; | ||||
graphStatus CheckOptions(const std::map<std::string, std::string> &options); | graphStatus CheckOptions(const std::map<std::string, std::string> &options); | ||||
graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs); | graphStatus CreateInputsForIRBuild(const ge::Graph &graph, vector<ge::GeTensor> &inputs); | ||||
graphStatus GetDefaultInputShape(const Graph &graph, string &default_shape); | |||||
graphStatus UpdateDataOpAttr(const Graph &graph); | graphStatus UpdateDataOpAttr(const Graph &graph); | ||||
graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options); | graphStatus Init(const Graph &graph, const std::map<std::string, std::string> &options); | ||||
graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options, | graphStatus BuildModel(const Graph &graph, const std::map<std::string, std::string> &options, | ||||
@@ -318,42 +317,10 @@ graphStatus Impl::CheckOptions(const std::map<std::string, std::string> &options | |||||
if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { | if (it != options_.end() && (CheckDisableReuseMemoryParamValid(it->second) != GRAPH_SUCCESS)) { | ||||
return GRAPH_PARAM_INVALID; | return GRAPH_PARAM_INVALID; | ||||
} | } | ||||
return GRAPH_SUCCESS; | |||||
} | |||||
graphStatus Impl::GetDefaultInputShape(const Graph &graph, string &default_shape) { | |||||
auto compute_graph = ge::GraphUtils::GetComputeGraph(graph); | |||||
GE_CHECK_NOTNULL(compute_graph); | |||||
for (ge::NodePtr &input_node : compute_graph->GetDirectNode()) { | |||||
GE_CHECK_NOTNULL(input_node); | |||||
ge::OpDescPtr op = input_node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op); | |||||
if (op->GetType() == DATA) { | |||||
string data_op_name = op->GetName(); | |||||
GELOGD("Data op name: %s, data op inputDesc size: %zu", data_op_name.c_str(), op->GetAllInputsDesc().size()); | |||||
ge::GeTensorDesc tensor = op->GetInputDesc(0); | |||||
ge::GeShape data_shape = tensor.GetShape(); | |||||
GELOGD("Data op get shape from InputDesc in ge ir graph."); | |||||
string tmp_shape_str; | |||||
const std::vector<int64_t> &tmp_shape = data_shape.GetDims(); | |||||
if (tmp_shape.empty()) { | |||||
GELOGW("Data op: %s has zero shape dims!", data_op_name.c_str()); | |||||
} else { | |||||
tmp_shape_str += data_op_name + ":"; | |||||
for (auto tmp_dim : tmp_shape) { | |||||
tmp_shape_str += to_string((long)tmp_dim) + ","; | |||||
} | |||||
tmp_shape_str = tmp_shape_str.substr(0, tmp_shape_str.size() - 1); | |||||
tmp_shape_str += ";"; | |||||
default_shape += tmp_shape_str; | |||||
} | |||||
GELOGD("Data op name: %s, data shape: %s.", data_op_name.c_str(), tmp_shape_str.c_str()); | |||||
} | |||||
// Check Input Format | |||||
if (options_.find(kInputFormat) != options_.end()) { | |||||
return CheckInputFormat(options_[kInputFormat]); | |||||
} | } | ||||
default_shape = (default_shape.empty() ? default_shape : default_shape.substr(0, default_shape.size() - 1)); | |||||
GELOGI("Get default data op shape: %s from ge ir graph.", default_shape.c_str()); | |||||
return GRAPH_SUCCESS; | return GRAPH_SUCCESS; | ||||
} | } | ||||
@@ -378,13 +345,7 @@ graphStatus Impl::Init(const Graph &graph, const std::map<std::string, std::stri | |||||
GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); | GE_CHK_BOOL_RET_STATUS_NOLOG(ge::CheckLogParamValidAndSetLogLevel(log) == 0, GRAPH_PARAM_INVALID); | ||||
options_[ge::ir_option::LOG_LEVEL] = log; | options_[ge::ir_option::LOG_LEVEL] = log; | ||||
string input_shape; | |||||
if (options_.find("input_shape") == options_.end()) { | |||||
GE_CHK_BOOL_EXEC(GetDefaultInputShape(graph, input_shape) == ge::SUCCESS, | |||||
return ge::GRAPH_PARAM_INVALID, "Get default data op shape from graph failed!"); | |||||
} else { | |||||
input_shape = options_["input_shape"]; | |||||
} | |||||
string input_shape = options_.find("input_shape") == options_.end() ? "" : options_["input_shape"]; | |||||
string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; | string input_format = options_.find("input_format") == options_.end() ? "" : options_["input_format"]; | ||||
string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; | string net_format = options_.find("net_format") == options_.end() ? "" : options_["net_format"]; | ||||
string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() | string dynamic_batch_size = options_.find(ge::ir_option::DYNAMIC_BATCH_SIZE) == options_.end() | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||
@@ -39,7 +39,7 @@ size_t GetAlignedSize(size_t size) { | |||||
} | } | ||||
Status ProfilingTaskInfo(OpTask *op_task) { | Status ProfilingTaskInfo(OpTask *op_task) { | ||||
if (!ProfilingManager::Instance().ProfilingModelExecuteOn()) { | |||||
if (!ProfilingManager::Instance().ProfilingModelLoadOn()) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -112,8 +112,9 @@ Status OpTask::GetProfilingArgs(std::string &model_name, std::string &op_name, u | |||||
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) { | ||||
return UNSUPPORTED; | return UNSUPPORTED; | ||||
} | } | ||||
Status OpTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param); | |||||
Status OpTask::DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace) { | |||||
auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, keep_workspace); | |||||
auto all_addresses = BuildTaskUtils::JoinAddresses(addresses); | auto all_addresses = BuildTaskUtils::JoinAddresses(addresses); | ||||
uintptr_t *arg_base = nullptr; | uintptr_t *arg_base = nullptr; | ||||
size_t arg_num = 0; | size_t arg_num = 0; | ||||
@@ -132,6 +133,10 @@ Status OpTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status OpTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
return DoUpdateArgTable(param, true); | |||||
} | |||||
Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | Status OpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc, | ||||
const vector<DataBuffer> &input_buffers, | const vector<DataBuffer> &input_buffers, | ||||
vector<GeTensorDesc> &output_desc, | vector<GeTensorDesc> &output_desc, | ||||
@@ -792,10 +797,9 @@ Status AiCpuTask::LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status AiCpuTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
auto addresses = BuildTaskUtils::GetAddresses(op_desc_, param, false); | |||||
io_addr_host_ = BuildTaskUtils::JoinAddresses(addresses); | |||||
return SUCCESS; | |||||
Status AiCpuBaseTask::UpdateArgTable(const SingleOpModelParam ¶m) { | |||||
// aicpu do not have workspace, for now | |||||
return DoUpdateArgTable(param, false); | |||||
} | } | ||||
void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | void AiCpuTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) { | ||||
@@ -54,6 +54,8 @@ class OpTask { | |||||
rtStream_t stream); | rtStream_t stream); | ||||
protected: | protected: | ||||
Status DoUpdateArgTable(const SingleOpModelParam ¶m, bool keep_workspace); | |||||
DumpProperties dump_properties_; | DumpProperties dump_properties_; | ||||
DumpOp dump_op_; | DumpOp dump_op_; | ||||
OpDescPtr op_desc_; | OpDescPtr op_desc_; | ||||
@@ -110,7 +112,7 @@ class AiCpuBaseTask : public OpTask { | |||||
AiCpuBaseTask() = default; | AiCpuBaseTask() = default; | ||||
~AiCpuBaseTask() override; | ~AiCpuBaseTask() override; | ||||
UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | UnknowShapeOpType GetUnknownType() const { return unknown_type_; } | ||||
Status UpdateArgTable(const SingleOpModelParam ¶m) override; | |||||
protected: | protected: | ||||
Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | Status UpdateIoAddr(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs); | ||||
Status SetInputConst(); | Status SetInputConst(); | ||||
@@ -137,7 +139,6 @@ class AiCpuTask : public AiCpuBaseTask { | |||||
~AiCpuTask() override; | ~AiCpuTask() override; | ||||
Status LaunchKernel(rtStream_t stream) override; | Status LaunchKernel(rtStream_t stream) override; | ||||
Status UpdateArgTable(const SingleOpModelParam ¶m) override; | |||||
void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; | void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override; | ||||
Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | Status LaunchKernel(const std::vector<GeTensorDesc> &input_desc, | ||||
@@ -293,6 +293,7 @@ const std::string MDL_BANK_PATH_FLAG = "ge.mdl_bank_path"; | |||||
// Configure op bank path | // Configure op bank path | ||||
const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; | const std::string OP_BANK_PATH_FLAG = "ge.op_bank_path"; | ||||
const std::string OP_BANK_UPDATE_FLAG = "ge.op_bank_update"; | |||||
// Graph run mode | // Graph run mode | ||||
enum GraphRunMode { PREDICTION = 0, TRAIN }; | enum GraphRunMode { PREDICTION = 0, TRAIN }; | ||||
@@ -366,6 +367,7 @@ static const char *const OP_COMPILER_CACHE_DIR = ge::OP_COMPILER_CACHE_DIR; | |||||
static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; | static const char *const OP_COMPILER_CACHE_MODE = ge::OP_COMPILER_CACHE_MODE; | ||||
static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); | static const char *const MDL_BANK_PATH = ge::MDL_BANK_PATH_FLAG.c_str(); | ||||
static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | static const char *const OP_BANK_PATH = ge::OP_BANK_PATH_FLAG.c_str(); | ||||
static const char *const OP_BANK_UPDATE = ge::OP_BANK_UPDATE_FLAG.c_str(); | |||||
static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | static const char *const OP_DEBUG_LEVEL = ge::OP_DEBUG_LEVEL.c_str(); | ||||
// for interface: aclgrphBuildModel | // for interface: aclgrphBuildModel | ||||
@@ -389,22 +391,13 @@ const std::set<std::string> ir_builder_suppported_options = {INPUT_FORMAT, | |||||
OP_COMPILER_CACHE_DIR, | OP_COMPILER_CACHE_DIR, | ||||
OP_COMPILER_CACHE_MODE, | OP_COMPILER_CACHE_MODE, | ||||
MDL_BANK_PATH, | MDL_BANK_PATH, | ||||
OP_BANK_PATH}; | |||||
OP_BANK_PATH, | |||||
OP_BANK_UPDATE}; | |||||
// for interface: aclgrphParse | // for interface: aclgrphParse | ||||
const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT, | |||||
INPUT_SHAPE, | |||||
OP_NAME_MAP, | |||||
IS_DYNAMIC_INPUT, | |||||
INPUT_FP16_NODES, | |||||
IS_INPUT_ADJUST_HW_LAYOUT, | |||||
IS_OUTPUT_ADJUST_HW_LAYOUT, | |||||
OUTPUT, | |||||
OUTPUT_TYPE, | |||||
OUT_NODES, | |||||
COMPRESS_WEIGHT_CONF, | |||||
ENABLE_SCOPE_FUSION_PASSES, | |||||
LOG_LEVEL}; | |||||
const std::set<std::string> ir_parser_suppported_options = { | |||||
INPUT_FP16_NODES, IS_INPUT_ADJUST_HW_LAYOUT, IS_OUTPUT_ADJUST_HW_LAYOUT, OUTPUT, | |||||
OUT_NODES, COMPRESS_WEIGHT_CONF, ENABLE_SCOPE_FUSION_PASSES}; | |||||
// for interface: aclgrphBuildInitialize | // for interface: aclgrphBuildInitialize | ||||
const std::set<std::string> global_options = {CORE_TYPE, | const std::set<std::string> global_options = {CORE_TYPE, | ||||
@@ -37,7 +37,9 @@ enum FrameworkType { | |||||
MINDSPORE = 1, | MINDSPORE = 1, | ||||
TENSORFLOW = 3, | TENSORFLOW = 3, | ||||
ANDROID_NN, | ANDROID_NN, | ||||
#ifndef ONLY_COMPILE_OPEN_SRC | |||||
ONNX, | ONNX, | ||||
#endif | |||||
FRAMEWORK_RESERVED, | FRAMEWORK_RESERVED, | ||||
}; | }; | ||||
@@ -20,7 +20,8 @@ | |||||
#include "ge/ge_api_error_codes.h" | #include "ge/ge_api_error_codes.h" | ||||
#include "toolchain/prof_callback.h" | #include "toolchain/prof_callback.h" | ||||
#define MAX_DEV_NUM (64) | |||||
const int MAX_DEV_NUM = 64; | |||||
enum ProfCommandHandleType { | enum ProfCommandHandleType { | ||||
kProfCommandhandleInit = 0, | kProfCommandhandleInit = 0, | ||||
kProfCommandhandleStart, | kProfCommandhandleStart, | ||||
@@ -30,8 +30,6 @@ | |||||
#include "runtime/base.h" | #include "runtime/base.h" | ||||
namespace ge { | namespace ge { | ||||
class ModelListenerAdapter; | |||||
class SingleOp; | class SingleOp; | ||||
class DynamicSingleOp; | class DynamicSingleOp; | ||||
@@ -55,14 +53,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
ge::Status Initialize(); | ge::Status Initialize(); | ||||
ge::Status Finalize(); | ge::Status Finalize(); | ||||
// Load model | |||||
ge::Status LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key, int32_t priority, | |||||
std::shared_ptr<ge::ModelListener> listener); | |||||
ge::Status UnloadModel(uint32_t modelId); | ge::Status UnloadModel(uint32_t modelId); | ||||
ge::Status RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data); | |||||
// Get input and output descriptor | // Get input and output descriptor | ||||
ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false); | std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false); | ||||
@@ -168,9 +160,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ge::Status GetModelDescInfoForZeroCopy(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc, | ||||
std::vector<ge::TensorDesc> &output_desc); | std::vector<ge::TensorDesc> &output_desc); | ||||
ge::Status LoadModel(uint32_t &model_id, const ge::ModelData &model_data, | |||||
std::shared_ptr<ge::ModelListener> listener); | |||||
ge::Status CommandHandle(const ge::Command &command); | ge::Status CommandHandle(const ge::Command &command); | ||||
ge::Status SetDump(const DumpConfig &dump_config); | ge::Status SetDump(const DumpConfig &dump_config); | ||||
@@ -297,8 +286,6 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor { | |||||
private: | private: | ||||
static bool isInit_; | static bool isInit_; | ||||
}; | }; | ||||
ge::Status ModelInfoParser(const ge::ModelData &model, ge::ModelInfo &model_info); | |||||
} // namespace ge | } // namespace ge | ||||
#endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ | #endif // INC_FRAMEWORK_EXECUTOR_GE_EXECUTOR_H_ |
@@ -36,7 +36,7 @@ using Status = domi::Status; | |||||
namespace domi { | namespace domi { | ||||
using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | using GetGraphCallback = std::function<std::unique_ptr<google::protobuf::Message>( | ||||
const google::protobuf::Message *root_proto, const std::string &graph)>; | |||||
const google::protobuf::Message *root_proto, const std::string &graph)>; | |||||
class ModelParser { | class ModelParser { | ||||
public: | public: | ||||
ModelParser() {} | ModelParser() {} | ||||
@@ -44,19 +44,20 @@ class ModelParser { | |||||
virtual ~ModelParser() {} | virtual ~ModelParser() {} | ||||
/** | /** | ||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] file Network model file path | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] file Network model file path | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status Parse(const char *file, ge::Graph &graph) = 0; | virtual Status Parse(const char *file, ge::Graph &graph) = 0; | ||||
/** | /** | ||||
* @ingroup domi_omg | * @ingroup domi_omg | ||||
* @brief Parse relevant data from memory and save it to graph | * @brief Parse relevant data from memory and save it to graph | ||||
* @param [in] input Model file memory data | * @param [in] input Model file memory data | ||||
* @param [in] input Model file memory size | |||||
* @param [in|out] graph A graph for saving the model information after analysis | * @param [in|out] graph A graph for saving the model information after analysis | ||||
* @return SUCCESS | * @return SUCCESS | ||||
* @return FAILED | * @return FAILED | ||||
@@ -64,36 +65,49 @@ class ModelParser { | |||||
*/ | */ | ||||
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseFromMemory(const char *data, uint32_t size, ge::ComputeGraphPtr &graph) = 0; | ||||
#ifndef ONLY_COMPILE_OPEN_SRC | |||||
/** | |||||
* @ingroup domi_omg | |||||
* @brief Parse relevant data from memory and save it to graph | |||||
* @param [in] input Model file memory data | |||||
* @param [in] input Model file memory size | |||||
* @param [in|out] graph A graph for saving the model information after analysis | |||||
* @return SUCCESS | |||||
* @return FAILED | |||||
* @author | |||||
*/ | |||||
virtual Status ParseFromMemory(const char *data, uint32_t size, ge::Graph &graph) = 0; | |||||
#endif | |||||
/** | /** | ||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] proto network model | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
* @ingroup domi_omg | |||||
* @brief Analyze network model data | |||||
* @param [in] proto network model | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | virtual Status ParseProto(const google::protobuf::Message *proto, ge::ComputeGraphPtr &graph) = 0; | ||||
/** | /** | ||||
* @ingroup domi_omg | |||||
* @brief Analyze callback model data in subgraph | |||||
* @param [in] proto network model | |||||
* @param [in] callback callback of subgraph | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, | |||||
GetGraphCallback callback, | |||||
* @ingroup domi_omg | |||||
* @brief Analyze callback model data in subgraph | |||||
* @param [in] proto network model | |||||
* @param [in] callback callback of subgraph | |||||
* @param [in|out] graph Save the network information after analysis | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ParseProtoWithSubgraph(const google::protobuf::Message *proto, GetGraphCallback callback, | |||||
ge::ComputeGraphPtr &graph) = 0; | ge::ComputeGraphPtr &graph) = 0; | ||||
/** | /** | ||||
* @ingroup domi_omg | |||||
* @brief Convert model files to JSON format | |||||
* @param [in] model_file Model file path to be converted | |||||
* @param [out] json_file Converted JSON file path | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
* @ingroup domi_omg | |||||
* @brief Convert model files to JSON format | |||||
* @param [in] model_file Model file path to be converted | |||||
* @param [out] json_file Converted JSON file path | |||||
* @return SUCCESS | |||||
* @return Others failed | |||||
*/ | |||||
virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | virtual Status ToJson(const char *model_file, const char *json_file) { return domi::SUCCESS; } | ||||
/* | /* | ||||
@@ -59,7 +59,7 @@ struct ParserContext { | |||||
bool train_flag = false; | bool train_flag = false; | ||||
domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | domi::domiTensorFormat_t format = domi::DOMI_TENSOR_ND; | ||||
domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | domi::FrameworkType type = domi::FRAMEWORK_RESERVED; | ||||
RunMode run_mode = ONLY_PRE_CHECK; | |||||
RunMode run_mode = GEN_OM_MODEL; | |||||
// save caffe custom proto path, used by caffe parse | // save caffe custom proto path, used by caffe parse | ||||
std::string custom_proto_path; | std::string custom_proto_path; | ||||
// save caffe proto path, used by caffe parse | // save caffe proto path, used by caffe parse | ||||
@@ -167,6 +167,7 @@ const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims"; | |||||
const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size"; | ||||
const std::string ATTR_NAME_ROOT_GRAPH_ID = "_root_graph_id"; | const std::string ATTR_NAME_ROOT_GRAPH_ID = "_root_graph_id"; | ||||
const std::string ATTR_NAME_ROOT_GRAPH_NAME = "_root_graph_name"; | |||||
// Identify node connecting to input and output | // Identify node connecting to input and output | ||||
const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data"; | const std::string ATTR_NAME_NODE_CONNECT_INPUT = "_is_connected_to_data"; | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||
@@ -118,8 +118,7 @@ const std::map<std::string, Format> kDataFormatMap = { | |||||
{"NCDHW", FORMAT_NCDHW}, | {"NCDHW", FORMAT_NCDHW}, | ||||
{"ND", FORMAT_ND}}; | {"ND", FORMAT_ND}}; | ||||
const std::map<std::string, Format> kStringToFormatMap = | |||||
{ | |||||
const std::map<std::string, Format> kStringToFormatMap = { | |||||
{"NCHW", FORMAT_NCHW}, | {"NCHW", FORMAT_NCHW}, | ||||
{"NHWC", FORMAT_NHWC}, | {"NHWC", FORMAT_NHWC}, | ||||
{"ND", FORMAT_ND}, | {"ND", FORMAT_ND}, | ||||
@@ -164,7 +163,7 @@ const std::map<std::string, Format> kStringToFormatMap = | |||||
{"NULL", FORMAT_NULL}, | {"NULL", FORMAT_NULL}, | ||||
// add for json input | // add for json input | ||||
{"RESERVED", FORMAT_RESERVED}, | {"RESERVED", FORMAT_RESERVED}, | ||||
{"UNDEFINED", FORMAT_RESERVED}, | |||||
{"UNDEFINED", FORMAT_RESERVED} | |||||
}; | }; | ||||
const std::map<DataType, std::string> kDataTypeToStringMap = { | const std::map<DataType, std::string> kDataTypeToStringMap = { | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||
@@ -19,12 +19,8 @@ | |||||
#include <map> | #include <map> | ||||
#include <string> | #include <string> | ||||
#include <vector> | |||||
#include "platform_info_def.h" | #include "platform_info_def.h" | ||||
using std::map; | |||||
using std::vector; | |||||
using std::string; | |||||
#include "platform_infos_def.h" | |||||
namespace fe { | namespace fe { | ||||
class PlatformInfoManager { | class PlatformInfoManager { | ||||
@@ -36,66 +32,143 @@ class PlatformInfoManager { | |||||
uint32_t InitializePlatformInfo(); | uint32_t InitializePlatformInfo(); | ||||
uint32_t Finalize(); | uint32_t Finalize(); | ||||
uint32_t GetPlatformInfo(const string SoCVersion, PlatformInfo &platform_info, OptionalInfo &opti_compilation_info); | |||||
uint32_t GetPlatformInfo(const std::string SoCVersion, | |||||
PlatformInfo &platform_info, | |||||
OptionalInfo &opti_compilation_info); | |||||
uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platform_info, OptionalInfo &opti_compilation_info); | uint32_t GetPlatformInfoWithOutSocVersion(PlatformInfo &platform_info, OptionalInfo &opti_compilation_info); | ||||
void SetOptionalCompilationInfo(OptionalInfo &opti_compilation_info); | void SetOptionalCompilationInfo(OptionalInfo &opti_compilation_info); | ||||
uint32_t GetPlatformInfos(const std::string SoCVersion, | |||||
PlatFormInfos &platform_info, | |||||
OptionalInfos &opti_compilation_info); | |||||
uint32_t GetPlatformInfoWithOutSocVersion(PlatFormInfos &platform_info, OptionalInfos &opti_compilation_info); | |||||
void SetOptionalCompilationInfo(OptionalInfos &opti_compilation_info); | |||||
private: | private: | ||||
PlatformInfoManager(); | PlatformInfoManager(); | ||||
~PlatformInfoManager(); | ~PlatformInfoManager(); | ||||
uint32_t LoadIniFile(string ini_file_real_path); | |||||
uint32_t LoadIniFile(std::string ini_file_real_path); | |||||
void Trim(string &str); | |||||
void Trim(std::string &str); | |||||
uint32_t LoadConfigFile(string real_path); | |||||
uint32_t LoadConfigFile(std::string real_path); | |||||
string RealPath(const std::string &path); | |||||
std::string RealPath(const std::string &path); | |||||
string GetSoFilePath(); | |||||
std::string GetSoFilePath(); | |||||
void ParseVersion(map<string, string> &version_map, string &soc_version, PlatformInfo &platform_info_temp); | |||||
void ParseVersion(std::map<std::string, std::string> &version_map, | |||||
std::string &soc_version, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseSocInfo(map<string, string> &soc_info_map, PlatformInfo &platform_info_temp); | |||||
void ParseSocInfo(std::map<std::string, std::string> &soc_info_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseCubeOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseCubeOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseBufferOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseBufferOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseUBOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseUBOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseUnzipOfAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseUnzipOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseAICoreSpec(map<string, string> &ai_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseBufferOfAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); | |||||
void ParseBufferOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); | |||||
void ParseAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseUBOfAICoreMemoryRates(map<string, string> &ai_core_memory_rates_map, PlatformInfo &platform_info_temp); | |||||
void ParseUBOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseAICoreintrinsicDtypeMap(map<string, string> &ai_coreintrinsic_dtype_map, PlatformInfo &platform_info_temp); | |||||
void ParseAICoreintrinsicDtypeMap(std::map<std::string, std::string> &ai_coreintrinsic_dtype_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseVectorCoreSpec(map<string, string> &vector_core_spec_map, PlatformInfo &platform_info_temp); | |||||
void ParseVectorCoreSpec(std::map<std::string, std::string> &vector_core_spec_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseVectorCoreMemoryRates(map<string, string> &vector_core_memory_rates_map, PlatformInfo &platform_info_temp); | |||||
void ParseVectorCoreMemoryRates(std::map<std::string, std::string> &vector_core_memory_rates_map, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseCPUCache(map<string, string> &CPUCacheMap, PlatformInfo &platform_info_temp); | |||||
void ParseCPUCache(std::map<std::string, std::string> &CPUCacheMap, | |||||
PlatformInfo &platform_info_temp); | |||||
void ParseVectorCoreintrinsicDtypeMap(map<string, string> &vector_coreintrinsic_dtype_map, | |||||
void ParseVectorCoreintrinsicDtypeMap(std::map<std::string, std::string> &vector_coreintrinsic_dtype_map, | |||||
PlatformInfo &platform_info_temp); | PlatformInfo &platform_info_temp); | ||||
uint32_t ParsePlatformInfoFromStrToStruct(map<string, map<string, string>> &content_info_map, string &soc_version, | |||||
uint32_t ParsePlatformInfoFromStrToStruct(std::map<std::string, std::map<std::string, std::string>> &content_info_map, | |||||
std::string &soc_version, | |||||
PlatformInfo &platform_info_temp); | PlatformInfo &platform_info_temp); | ||||
uint32_t AssemblePlatformInfoVector(map<string, map<string, string>> &content_info_map); | |||||
void ParseVersion(std::map<std::string, std::string> &version_map, | |||||
std::string &soc_version, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseSocInfo(std::map<std::string, std::string> &soc_info_map, PlatFormInfos &platform_info_temp); | |||||
void ParseCubeOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseBufferOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseUBOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseUnzipOfAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseAICoreSpec(std::map<std::string, std::string> &ai_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseBufferOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseUBOfAICoreMemoryRates(std::map<std::string, std::string> &ai_core_memory_rates_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseAICoreintrinsicDtypeMap(std::map<std::string, std::string> &ai_coreintrinsic_dtype_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseVectorCoreSpec(std::map<std::string, std::string> &vector_core_spec_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseVectorCoreMemoryRates(std::map<std::string, std::string> &vector_core_memory_rates_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseCPUCache(std::map<std::string, std::string> &CPUCacheMap, | |||||
PlatFormInfos &platform_info_temp); | |||||
void ParseVectorCoreintrinsicDtypeMap(std::map<std::string, std::string> &vector_coreintrinsic_dtype_map, | |||||
PlatFormInfos &platform_info_temp); | |||||
uint32_t ParsePlatformInfo(std::map<std::string, std::map<std::string, std::string>> &content_info_map, | |||||
std::string &soc_version, | |||||
PlatFormInfos &platform_info_temp); | |||||
uint32_t AssemblePlatformInfoVector(std::map<std::string, std::map<std::string, std::string>> &content_info_map); | |||||
private: | private: | ||||
bool init_flag_; | bool init_flag_; | ||||
map<string, PlatformInfo> platform_info_map_; | |||||
std::map<std::string, PlatformInfo> platform_info_map_; | |||||
OptionalInfo opti_compilation_info_; | OptionalInfo opti_compilation_info_; | ||||
std::map<std::string, PlatFormInfos> platform_infos_map_; | |||||
OptionalInfos opti_compilation_infos_; | |||||
}; | }; | ||||
} // namespace fe | } // namespace fe | ||||
#endif | #endif |
@@ -0,0 +1,283 @@ | |||||
/** | |||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
#ifndef PLATFORM_INFOS_DEF_H | |||||
#define PLATFORM_INFOS_DEF_H | |||||
#include <map> | |||||
#include <string> | |||||
#include <vector> | |||||
#include <memory> | |||||
#include "platform_info_def.h" | |||||
namespace fe { | |||||
class StrInfoImpl; | |||||
using StrInfoImplPtr = std::shared_ptr<StrInfoImpl>; | |||||
class StrInfos { | |||||
public: | |||||
bool Init(); | |||||
std::string GetAIcVersion(); | |||||
std::string GetCcecAIcVersion(); | |||||
std::string GetCcecAIvVersion(); | |||||
std::string IsSupportAICpuCompiler(); | |||||
void SetAIcVersion(std::string &aic_version); | |||||
void SetCcecAIcVersion(std::string &ccec_aic_version); | |||||
void SetCcecAIvVersion(std::string &ccec_aiv_version); | |||||
void SetIsSupportAICpuCompiler(std::string &is_support_ai_cpu_compiler); | |||||
private: | |||||
StrInfoImplPtr str_info_impl_{nullptr}; | |||||
}; | |||||
class SoCInfoImpl; | |||||
using SoCInfoImplPtr = std::shared_ptr<SoCInfoImpl>; | |||||
class SoCInfos { | |||||
public: | |||||
bool Init(); | |||||
uint32_t GetAICoreCnt(); | |||||
uint32_t GetVectorCoreCnt(); | |||||
uint32_t GetAICpuCnt(); | |||||
MemoryType GetMemType(); | |||||
uint64_t GetMemSize(); | |||||
L2Type GetL2Type(); | |||||
uint64_t GetL2Size(); | |||||
uint32_t GetL2PageNum(); | |||||
void SetAICoreCnt(uint32_t ai_core_cnt); | |||||
void SetVectorCoreCnt(uint32_t vector_core_cnt); | |||||
void SetAICpuCnt(uint32_t ai_cpu_cnt); | |||||
void SetMemType(MemoryType memory_type); | |||||
void SetMemSize(uint64_t memory_size); | |||||
void SetL2Type(L2Type l2_type); | |||||
void SetL2Size(uint64_t l2_size); | |||||
void SetL2PageNum(uint32_t l2_page_num); | |||||
private: | |||||
SoCInfoImplPtr soc_info_impl_{nullptr}; | |||||
}; | |||||
class AICoreSpecImpl; | |||||
using AICoreSpecImplPtr = std::shared_ptr<AICoreSpecImpl>; | |||||
class AICoreSpecs { | |||||
public: | |||||
bool Init(); | |||||
double GetCubeFreq(); | |||||
uint64_t GetCubeMSize(); | |||||
uint64_t GetCubeNSize(); | |||||
uint64_t GetCubeKSize(); | |||||
uint64_t GetVecCalcSize(); | |||||
uint64_t GetL0aSize(); | |||||
uint64_t GetL0bSize(); | |||||
uint64_t GetL0cSize(); | |||||
uint64_t GetL1Size(); | |||||
uint64_t GetSmaskBuffer(); | |||||
uint64_t GetUBSize(); | |||||
uint64_t GetUBBlockSize(); | |||||
uint64_t GetUBBankSize(); | |||||
uint64_t GetUBBankNum(); | |||||
uint64_t GetUBBurstInOneBlock(); | |||||
uint64_t GetUBBankGroupNum(); | |||||
uint32_t GetUnzipEngines(); | |||||
uint32_t GetUnzipMaxRatios(); | |||||
uint32_t GetUnzipChannels(); | |||||
uint8_t GetUnzipIsTight(); | |||||
uint8_t GetCubeVectorSplit(); | |||||
void SetCubeFreq(double cube_freq); | |||||
void SetCubeMSize(uint64_t cube_m_size); | |||||
void SetCubeNSize(uint64_t cube_n_size); | |||||
void SetCubeKSize(uint64_t cube_k_size); | |||||
void SetVecCalcSize(uint64_t vec_calc_size); | |||||
void SetL0aSize(uint64_t l0_a_size); | |||||
void SetL0bSize(uint64_t l0_b_size); | |||||
void SetL0cSize(uint64_t l0_c_size); | |||||
void SetL1Size(uint64_t l1_size); | |||||
void SetSmaskBuffer(uint64_t smask_buffer); | |||||
void SetUBSize(uint64_t ub_size); | |||||
void SetUBBlockSize(uint64_t ubblock_size); | |||||
void SetUBBankSize(uint64_t ubbank_size); | |||||
void SetUBBankNum(uint64_t ubbank_num); | |||||
void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block); | |||||
void SetUBBankGroupNum(uint64_t ubbank_group_num); | |||||
void SetUnzipEngines(uint32_t unzip_engines); | |||||
void SetUnzipMaxRatios(uint32_t unzip_max_ratios); | |||||
void SetUnzipChannels(uint32_t unzip_channels); | |||||
void SetUnzipIsTight(uint8_t unzip_is_tight); | |||||
void SetCubeVectorSplit(uint8_t cube_vector_split); | |||||
private: | |||||
AICoreSpecImplPtr aicore_spec_impl_{nullptr}; | |||||
}; | |||||
class AICoreMemRateImpl; | |||||
using AICoreMemRateImplPtr = std::shared_ptr<AICoreMemRateImpl>; | |||||
class AICoreMemRates { | |||||
public: | |||||
bool Init(); | |||||
double GetDdrRate(); | |||||
double GetDdrReadRate(); | |||||
double GetDdrWriteRate(); | |||||
double GetL2Rate(); | |||||
double GetL2ReadRate(); | |||||
double GetL2WriteRate(); | |||||
double GetL1ToL0aRate(); | |||||
double GetL1ToL0bRate(); | |||||
double GetL1ToUBRate(); | |||||
double GetL0cToUBRate(); | |||||
double GetUBToL2Rate(); | |||||
double GetUBToDdrRate(); | |||||
double GetUBToL1Rate(); | |||||
void SetDdrRate(double ddr_rate); | |||||
void SetDdrReadRate(double ddr_read_rate); | |||||
void SetDdrWriteRate(double ddr_write_rate); | |||||
void SetL2Rate(double l2_rate); | |||||
void SetL2ReadRate(double l2_read_rate); | |||||
void SetL2WriteRate(double l2_write_rate); | |||||
void SetL1ToL0aRate(double l1_to_l0_a_rate); | |||||
void SetL1ToL0bRate(double l1_to_l0_b_rate); | |||||
void SetL1ToUBRate(double l1_to_ub_rate); | |||||
void SetL0cToUBRate(double l0_c_to_ub_rate); | |||||
void SetUBToL2Rate(double ub_to_l2_rate); | |||||
void SetUBToDdrRate(double ub_to_ddr_rate); | |||||
void SetUBToL1Rate(double ub_to_l1_rate); | |||||
private: | |||||
AICoreMemRateImplPtr aicore_mem_rate_impl_{nullptr}; | |||||
}; | |||||
class VectorCoreSpecImpl; | |||||
using VectorCoreSpecImplPtr = std::shared_ptr<VectorCoreSpecImpl>; | |||||
class VectorCoreSpecs { | |||||
public: | |||||
bool Init(); | |||||
double GetVecFreq(); | |||||
uint64_t GetVecCalcSize(); | |||||
uint64_t GetSmaskBuffer(); | |||||
uint64_t GetUBSize(); | |||||
uint64_t GetUBBlockSize(); | |||||
uint64_t GetUBBankSize(); | |||||
uint64_t GetUBBankNum(); | |||||
uint64_t GetUBBurstInOneBlock(); | |||||
uint64_t GetUBBankGroupNum(); | |||||
uint64_t GetVectorRegSize(); | |||||
uint64_t GetPredicateRegSize(); | |||||
uint64_t GetAddressRegSize(); | |||||
uint64_t GetAlignmentRegSize(); | |||||
void SetVecFreq(double vec_freq); | |||||
void SetVecCalcSize(uint64_t vec_calc_size); | |||||
void SetSmaskBuffer(uint64_t smask_buffer); | |||||
void SetUBSize(uint64_t ub_size); | |||||
void SetUBBlockSize(uint64_t ubblock_size); | |||||
void SetUBBankSize(uint64_t ubbank_size); | |||||
void SetUBBankNum(uint64_t ubbank_num); | |||||
void SetUBBurstInOneBlock(uint64_t ubburst_in_one_block); | |||||
void SetUBBankGroupNum(uint64_t ubbank_group_num); | |||||
void SetVectorRegSize(uint64_t vector_reg_size); | |||||
void SetPredicateRegSize(uint64_t predicate_reg_size); | |||||
void SetAddressRegSize(uint64_t address_reg_size); | |||||
void SetAlignmentRegSize(uint64_t alignment_reg_size); | |||||
private: | |||||
VectorCoreSpecImplPtr vector_core_spec_impl_{nullptr}; | |||||
}; | |||||
class VectorCoreMemRateImpl; | |||||
using VectorCoreMemRateImplPtr = std::shared_ptr<VectorCoreMemRateImpl>; | |||||
class VectorCoreMemRates { | |||||
public: | |||||
bool Init(); | |||||
double GetDdrRate(); | |||||
double GetDdrReadRate(); | |||||
double GetDdrWriteRate(); | |||||
double GetL2Rate(); | |||||
double GetL2ReadRate(); | |||||
double GetL2WriteRate(); | |||||
double GetUBToL2Rate(); | |||||
double GetUBToDdrRate(); | |||||
void SetDdrRate(double ddr_rate); | |||||
void SetDdrReadRate(double ddr_read_rate); | |||||
void SetDdrWriteRate(double ddr_write_rate); | |||||
void SetL2Rate(double l2_rate); | |||||
void SetL2ReadRate(double l2_read_rate); | |||||
void SetL2WriteRate(double l2_write_rate); | |||||
void SetUBToL2Rate(double ub_to_l2_rate); | |||||
void SetUBToDdrRate(double ub_to_ddr_rate); | |||||
private: | |||||
VectorCoreMemRateImplPtr vector_core_mem_rate_impl_{nullptr}; | |||||
}; | |||||
class CPUCacheImpl; | |||||
using CPUCacheImplPtr = std::shared_ptr<CPUCacheImpl>; | |||||
class CPUCaches { | |||||
public: | |||||
bool Init(); | |||||
uint32_t GetAICPUSyncBySW(); | |||||
uint32_t GetTSCPUSyncBySW(); | |||||
void SetAICPUSyncBySW(uint32_t AICPUSyncBySW); | |||||
void SetTSCPUSyncBySW(uint32_t TSCPUSyncBySW); | |||||
private: | |||||
CPUCacheImplPtr cpu_cache_impl_{nullptr}; | |||||
}; | |||||
class PlatFormInfosImpl; | |||||
using PlatFormInfosImplPtr = std::shared_ptr<PlatFormInfosImpl>; | |||||
class PlatFormInfos { | |||||
public: | |||||
bool Init(); | |||||
StrInfos GetStrInfo(); | |||||
SoCInfos GetSocInfo(); | |||||
AICoreSpecs GetAICoreSpec(); | |||||
AICoreMemRates GetAICoreMemRates(); | |||||
std::map<std::string, std::vector<std::string>> GetAICoreIntrinsicDtype(); | |||||
VectorCoreSpecs GetVectorCoreSpec(); | |||||
VectorCoreMemRates GetVectorCoreMemRates(); | |||||
CPUCaches GetCPUCache(); | |||||
std::map<std::string, std::vector<std::string>> GetVectorCoreIntrinsicDtype(); | |||||
void SetStrInfo(StrInfos &str_infos); | |||||
void SetSocInfo(SoCInfos &SoC_infos); | |||||
void SetAICoreSpec(AICoreSpecs &AICore_specs); | |||||
void SetAICoreMemRates(AICoreMemRates &AICore_mem_rates); | |||||
void SetAICoreIntrinsicDtype(std::map<std::string, std::vector<std::string>> &intrinsic_dtypes); | |||||
void SetVectorCoreSpec(VectorCoreSpecs &vector_core_specs); | |||||
void SetVectorCoreMemRates(VectorCoreMemRates &vectorcore_mem_rates); | |||||
void SetCPUCache(CPUCaches &CPU_caches); | |||||
void SetVectorCoreIntrinsicDtype(std::map<std::string, std::vector<std::string>> &intrinsic_dtypes); | |||||
private: | |||||
PlatFormInfosImplPtr platform_infos_impl_{nullptr}; | |||||
}; | |||||
class OptionalInfosImpl; | |||||
using OptionalInfosImplPtr = std::shared_ptr<OptionalInfosImpl>; | |||||
class OptionalInfos { | |||||
public: | |||||
bool Init(); | |||||
std::string GetSocVersion(); | |||||
std::string GetCoreType(); | |||||
uint32_t GetAICoreNum(); | |||||
std::string GetL1FusionFlag(); | |||||
void SetSocVersion(std::string soc_version); | |||||
void SetCoreType(std::string core_type); | |||||
void SetAICoreNum(uint32_t ai_core_num); | |||||
void SetL1FusionFlag(std::string l1_fusion_flag); | |||||
private: | |||||
OptionalInfosImplPtr optional_infos_impl_{nullptr}; | |||||
}; | |||||
} | |||||
#endif |
@@ -188,6 +188,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ROOT_GRAPH_ID; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ROOT_GRAPH_ID; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_ROOT_GRAPH_NAME; | |||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_INPUT; | ||||
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; | GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_NODE_CONNECT_OUTPUT; | ||||
@@ -15,6 +15,7 @@ message Output { | |||||
int32 original_output_data_type = 7; | int32 original_output_data_type = 7; | ||||
int32 original_output_format = 8; | int32 original_output_format = 8; | ||||
uint64 size = 9; | uint64 size = 9; | ||||
Shape origin_shape = 10; | |||||
} | } | ||||
message Input { | message Input { | ||||
@@ -23,6 +24,7 @@ message Input { | |||||
Shape shape = 3; | Shape shape = 3; | ||||
uint64 address = 4; | uint64 address = 4; | ||||
uint64 size = 5; | uint64 size = 5; | ||||
Shape origin_shape = 6; | |||||
} | } | ||||
enum BufferType { | enum BufferType { | ||||