From: @changzherui Reviewed-by: @ljl0711,@guoqi1024 Signed-off-by: @guoqi1024tags/v1.2.0
@@ -235,14 +235,14 @@ if [[ "X$ENABLE_GE_UT" = "Xon" || "X$ENABLE_GE_COV" = "Xon" ]]; then | |||||
# fi | # fi | ||||
# if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | # if [[ "X$ENABLE_GE_COV" = "Xon" ]]; then | ||||
echo "Generating coverage statistics, please wait..." | |||||
cd ${BASEPATH} | |||||
rm -rf ${BASEPATH}/cov | |||||
mkdir ${BASEPATH}/cov | |||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
lcov --remove cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '/usr/local/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | |||||
genhtml coverage.info | |||||
echo "Generating coverage statistics, please wait..." | |||||
cd ${BASEPATH} | |||||
rm -rf ${BASEPATH}/cov | |||||
mkdir ${BASEPATH}/cov | |||||
lcov -c -d build/tests/ut/ge -d build/tests/ut/common/graph/ -o cov/tmp.info | |||||
lcov -r cov/tmp.info '*/output/*' '*/build/opensrc/*' '*/build/proto/*' '*/third_party/*' '*/tests/*' '*/ge/common/*' '*/ge/executor/*' '*/ge/graph/*' '*/ge/host_kernels/*' '/usr/local/*' -o cov/coverage.info | |||||
cd ${BASEPATH}/cov | |||||
genhtml coverage.info | |||||
fi | fi | ||||
# generate output package in tar form, including ut/st libraries/executables | # generate output package in tar form, including ut/st libraries/executables | ||||
@@ -129,38 +129,38 @@ set(TRAIN_SRC_LIST | |||||
"graph/label/partitioned_call_label_maker.cc" | "graph/label/partitioned_call_label_maker.cc" | ||||
"graph/label/while_label_maker.cc" | "graph/label/while_label_maker.cc" | ||||
"graph/load/graph_loader.cc" | "graph/load/graph_loader.cc" | ||||
"graph/load/new_model_manager/cpu_queue_schedule.cc" | |||||
"graph/load/new_model_manager/data_dumper.cc" | |||||
"graph/load/new_model_manager/data_inputer.cc" | |||||
"graph/load/new_model_manager/davinci_model.cc" | |||||
"graph/load/new_model_manager/davinci_model_parser.cc" | |||||
"graph/load/new_model_manager/model_manager.cc" | |||||
"graph/load/new_model_manager/model_utils.cc" | |||||
"graph/load/new_model_manager/aipp_utils.cc" | |||||
"graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/hccl_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"graph/load/new_model_manager/task_info/task_info.cc" | |||||
"graph/load/new_model_manager/tbe_handle_store.cc" | |||||
"graph/load/new_model_manager/zero_copy_task.cc" | |||||
"graph/load/new_model_manager/zero_copy_offset.cc" | |||||
"graph/load/model_manager/cpu_queue_schedule.cc" | |||||
"graph/load/model_manager/data_dumper.cc" | |||||
"graph/load/model_manager/data_inputer.cc" | |||||
"graph/load/model_manager/davinci_model.cc" | |||||
"graph/load/model_manager/davinci_model_parser.cc" | |||||
"graph/load/model_manager/model_manager.cc" | |||||
"graph/load/model_manager/model_utils.cc" | |||||
"graph/load/model_manager/aipp_utils.cc" | |||||
"graph/load/model_manager/task_info/end_graph_task_info.cc" | |||||
"graph/load/model_manager/task_info/model_exit_task_info.cc" | |||||
"graph/load/model_manager/task_info/event_record_task_info.cc" | |||||
"graph/load/model_manager/task_info/event_wait_task_info.cc" | |||||
"graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||||
"graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||||
"graph/load/model_manager/task_info/hccl_task_info.cc" | |||||
"graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||||
"graph/load/model_manager/task_info/kernel_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_set_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||||
"graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_active_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||||
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"graph/load/model_manager/task_info/task_info.cc" | |||||
"graph/load/model_manager/tbe_handle_store.cc" | |||||
"graph/load/model_manager/zero_copy_task.cc" | |||||
"graph/load/model_manager/zero_copy_offset.cc" | |||||
"graph/manager/graph_context.cc" | "graph/manager/graph_context.cc" | ||||
"graph/manager/graph_manager.cc" | "graph/manager/graph_manager.cc" | ||||
"graph/manager/graph_manager_utils.cc" | "graph/manager/graph_manager_utils.cc" | ||||
@@ -375,6 +375,7 @@ set(TRAIN_SRC_LIST | |||||
"hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | "hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | ||||
"hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | "hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | ||||
"hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | "hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | ||||
"hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
"hybrid/node_executor/controlop/control_op_executor.cc" | "hybrid/node_executor/controlop/control_op_executor.cc" | ||||
"hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
"hybrid/node_executor/hccl/hccl_node_executor.cc" | "hybrid/node_executor/hccl/hccl_node_executor.cc" | ||||
@@ -605,37 +606,37 @@ set(INFER_SRC_LIST | |||||
"graph/manager/util/rt_context_util.cc" | "graph/manager/util/rt_context_util.cc" | ||||
"graph/manager/util/variable_accelerate_ctrl.cc" | "graph/manager/util/variable_accelerate_ctrl.cc" | ||||
"graph/manager/util/debug.cc" | "graph/manager/util/debug.cc" | ||||
"graph/load/new_model_manager/model_manager.cc" | |||||
"graph/load/new_model_manager/data_inputer.cc" | |||||
"graph/load/new_model_manager/davinci_model.cc" | |||||
"graph/load/new_model_manager/davinci_model_parser.cc" | |||||
"graph/load/new_model_manager/model_utils.cc" | |||||
"graph/load/new_model_manager/aipp_utils.cc" | |||||
"graph/load/new_model_manager/tbe_handle_store.cc" | |||||
"graph/load/new_model_manager/cpu_queue_schedule.cc" | |||||
"graph/load/new_model_manager/zero_copy_task.cc" | |||||
"graph/load/new_model_manager/zero_copy_offset.cc" | |||||
"graph/load/new_model_manager/data_dumper.cc" | |||||
"graph/load/new_model_manager/task_info/task_info.cc" | |||||
"graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"graph/load/model_manager/model_manager.cc" | |||||
"graph/load/model_manager/data_inputer.cc" | |||||
"graph/load/model_manager/davinci_model.cc" | |||||
"graph/load/model_manager/davinci_model_parser.cc" | |||||
"graph/load/model_manager/model_utils.cc" | |||||
"graph/load/model_manager/aipp_utils.cc" | |||||
"graph/load/model_manager/tbe_handle_store.cc" | |||||
"graph/load/model_manager/cpu_queue_schedule.cc" | |||||
"graph/load/model_manager/zero_copy_task.cc" | |||||
"graph/load/model_manager/zero_copy_offset.cc" | |||||
"graph/load/model_manager/data_dumper.cc" | |||||
"graph/load/model_manager/task_info/task_info.cc" | |||||
"graph/load/model_manager/task_info/event_record_task_info.cc" | |||||
"graph/load/model_manager/task_info/event_wait_task_info.cc" | |||||
"graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||||
"graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||||
"graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||||
"graph/load/model_manager/task_info/kernel_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_set_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||||
"graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_active_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||||
"graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||||
"graph/load/model_manager/task_info/end_graph_task_info.cc" | |||||
"graph/load/model_manager/task_info/model_exit_task_info.cc" | |||||
"graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"single_op/task/op_task.cc" | "single_op/task/op_task.cc" | ||||
"single_op/task/build_task_utils.cc" | "single_op/task/build_task_utils.cc" | ||||
"single_op/task/tbe_task_builder.cc" | "single_op/task/tbe_task_builder.cc" | ||||
@@ -57,6 +57,7 @@ message TaskDef { | |||||
LabelSetDef label_set = 37; | LabelSetDef label_set = 37; | ||||
LabelGotoExDef label_goto_ex = 38; | LabelGotoExDef label_goto_ex = 38; | ||||
LabelSwitchByIndexDef label_switch_by_index = 39; | LabelSwitchByIndexDef label_switch_by_index = 39; | ||||
KernelDefWithHandle kernel_with_handle = 40; | |||||
} | } | ||||
message KernelDef { | message KernelDef { | ||||
@@ -74,6 +75,19 @@ message KernelDef { | |||||
uint32 kernel_ext_info_size = 19; | uint32 kernel_ext_info_size = 19; | ||||
} | } | ||||
message KernelDefWithHandle { | |||||
KernelContext context = 1; | |||||
uint64 handle = 10; | |||||
string dev_func = 11; | |||||
uint32 block_dim = 12; | |||||
uint32 args_size = 13; | |||||
bytes args = 14; | |||||
bytes sm_desc = 15; | |||||
string original_kernel_key = 16; | |||||
string node_info = 17; | |||||
} | |||||
message KernelContext { | message KernelContext { | ||||
uint32 kernel_type = 1; | uint32 kernel_type = 1; | ||||
uint32 op_id = 2; // OP type in CCE | uint32 op_id = 2; // OP type in CCE | ||||
@@ -62,7 +62,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { | |||||
while (size > size_1g) { | while (size > size_1g) { | ||||
write_count = mmWrite(fd, reinterpret_cast<void *>(seek), size_1g); | write_count = mmWrite(fd, reinterpret_cast<void *>(seek), size_1g); | ||||
if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | ||||
GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); | |||||
GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
size -= size_1g; | size -= size_1g; | ||||
@@ -75,7 +75,7 @@ Status FileSaver::WriteData(const void *data, uint32_t size, int32_t fd) { | |||||
// -1: Failed to write to file; - 2: Illegal parameter | // -1: Failed to write to file; - 2: Illegal parameter | ||||
if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | if (write_count == EN_INVALID_PARAM || write_count == EN_ERROR) { | ||||
GELOGE(FAILED, "Write data failed. mmpa_errorno = %d, %s", write_count, strerror(errno)); | |||||
GELOGE(FAILED, "Write data failed. mmpa_errorno = %ld, %s", write_count, strerror(errno)); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -133,7 +133,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi | |||||
WriteData(static_cast<const void *>(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); | WriteData(static_cast<const void *>(&model_partition_table), table_size, fd) != SUCCESS, ret = FAILED; break); | ||||
// Write partition data | // Write partition data | ||||
for (const auto &partitionData : partition_datas) { | for (const auto &partitionData : partition_datas) { | ||||
GELOGI("GC:size[%zu]", partitionData.size); | |||||
GELOGI("GC:size[%u]", partitionData.size); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
WriteData(static_cast<const void *>(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; | WriteData(static_cast<const void *>(partitionData.data), partitionData.size, fd) != SUCCESS, ret = FAILED; | ||||
break); | break); | ||||
@@ -305,7 +305,7 @@ Status FileSaver::SaveWithFileHeader(const std::string &file_path, const ModelFi | |||||
// Write partition data | // Write partition data | ||||
auto &cur_partition_datas = all_partition_datas[index]; | auto &cur_partition_datas = all_partition_datas[index]; | ||||
for (const auto &partition_data : cur_partition_datas) { | for (const auto &partition_data : cur_partition_datas) { | ||||
GELOGI("GC:size[%zu]", partition_data.size); | |||||
GELOGI("GC:size[%u]", partition_data.size); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG( | ||||
WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; | WriteData(static_cast<const void *>(partition_data.data), partition_data.size, fd) != SUCCESS, ret = FAILED; | ||||
break); | break); | ||||
@@ -32,7 +32,7 @@ int64_t GetCubeSizeByDataType(DataType data_type) { | |||||
if (size <= 0) { | if (size <= 0) { | ||||
std::string error = "Failed to get cube size, the data type " + | std::string error = "Failed to get cube size, the data type " + | ||||
FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; | FmtToStr(TypeUtils::DataTypeToSerialString(data_type)) + " is invalid"; | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return -1; | return -1; | ||||
} else if (size == 1) { | } else if (size == 1) { | ||||
return kCubeSize * 2; // 32 bytes cube size | return kCubeSize * 2; // 32 bytes cube size | ||||
@@ -61,7 +61,7 @@ bool CheckShapeValid(const std::vector<int64_t> &shape, const int64_t expect_dim | |||||
if (expect_dims <= 0 || shape.size() != static_cast<size_t>(expect_dims)) { | if (expect_dims <= 0 || shape.size() != static_cast<size_t>(expect_dims)) { | ||||
std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + | std::string error = "Invalid shape, dims num " + FmtToStr(shape.size()) + | ||||
", expect " + FmtToStr(expect_dims); | ", expect " + FmtToStr(expect_dims); | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
return IsShapeValid(shape); | return IsShapeValid(shape); | ||||
@@ -75,12 +75,12 @@ bool IsShapeValid(const std::vector<int64_t> &shape) { | |||||
for (auto dim : shape) { | for (auto dim : shape) { | ||||
if (dim < 0) { | if (dim < 0) { | ||||
std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); | std::string error = "Invalid negative dims in the shape " + FmtToStr(ShapeToString(shape)); | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
if (dim != 0 && kShapeItemNumMAX / dim < num) { | if (dim != 0 && kShapeItemNumMAX / dim < num) { | ||||
std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); | std::string error = "Shape overflow, the total count should be less than " + FmtToStr(kShapeItemNumMAX); | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
num *= dim; | num *= dim; | ||||
@@ -108,7 +108,7 @@ bool IsTransShapeSrcCorrect(const TransArgs &args, std::vector<int64_t> &expect_ | |||||
FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " + | FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", invalid relationship between src shape " + | ||||
FmtToStr(ShapeToString(args.src_shape)) + " and dst " + | FmtToStr(ShapeToString(args.src_shape)) + " and dst " + | ||||
FmtToStr(ShapeToString(args.dst_shape)); | FmtToStr(ShapeToString(args.dst_shape)); | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
@@ -121,7 +121,7 @@ bool IsTransShapeDstCorrect(const TransArgs &args, std::vector<int64_t> &expect_ | |||||
FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + | FmtToStr(TypeUtils::FormatToSerialString(args.dst_format)) + ", the dst shape" + | ||||
FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + | FmtToStr(ShapeToString(args.dst_shape)) + " is invalid, expect" + | ||||
FmtToStr(ShapeToString(expect_shape)); | FmtToStr(ShapeToString(expect_shape)); | ||||
GE_ERRORLOG_AND_ERRORMSG(PARAM_INVALID, error.c_str()); | |||||
GE_WARNINGLOG_AND_ERRORMSG(error.c_str()); | |||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
@@ -28,7 +28,7 @@ | |||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/detail/attributes_holder.h" | #include "graph/detail/attributes_holder.h" | ||||
#include "graph/detail/model_serialize_imp.h" | #include "graph/detail/model_serialize_imp.h" | ||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
#include "graph/model.h" | #include "graph/model.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
@@ -1000,8 +1000,8 @@ Status ModelCacheHelper::RecoverVarAddrAndTensorDesc(const Json &json) const { | |||||
auto offset = (tensor_addr_mgr.offset); | auto offset = (tensor_addr_mgr.offset); | ||||
// Check logic address and offset | // Check logic address and offset | ||||
if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) { | if (logic_address - offset != VarManager::Instance(session_id_)->GetVarMemLogicBase()) { | ||||
GELOGW("Check logic_address[%u] and offset [%u] of %s failed, var mem logic base is %u, abandon", logic_address, | |||||
offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); | |||||
GELOGW("Check logic_address[%lu] and offset [%lu] of %s failed, var mem logic base is %lu, abandon", | |||||
logic_address, offset, iter.first.c_str(), VarManager::Instance(session_id_)->GetVarMemLogicBase()); | |||||
return PARAM_INVALID; | return PARAM_INVALID; | ||||
} | } | ||||
// Offset is needed by SaveVarVddr instead of logic address | // Offset is needed by SaveVarVddr instead of logic address | ||||
@@ -23,7 +23,7 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/omg/version.h" | #include "framework/omg/version.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
#include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
@@ -537,7 +537,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadRootMod | |||||
//model verison 1.0 file header does not have model_num member | //model verison 1.0 file header does not have model_num member | ||||
is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && | is_unknown_shape_model_ = file_header_->version >= ge::MODEL_VERSION && | ||||
file_header_->model_num > kStatiOmFileModelNum; | file_header_->model_num > kStatiOmFileModelNum; | ||||
GELOGD("cur om model is ge root model or no %d, model version %zu", is_unknown_shape_model_, file_header_->version); | |||||
GELOGD("cur om model is ge root model or no %d, model version %u", is_unknown_shape_model_, file_header_->version); | |||||
OmFileLoadHelper om_load_helper; | OmFileLoadHelper om_load_helper; | ||||
if (is_unknown_shape_model_) { | if (is_unknown_shape_model_) { | ||||
@@ -746,7 +746,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::LoadTask(Om | |||||
GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | GELOGE(INTERNAL_ERROR, "ReadProtoFromArray failed."); | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GELOGD("TASK_INFO op_size:%zu, stream_num:%u", task->op().size(), task->stream_num()); | |||||
GELOGD("TASK_INFO op_size:%d, stream_num:%u", task->op().size(), task->stream_num()); | |||||
} | } | ||||
cur_model->SetModelTaskDef(task); | cur_model->SetModelTaskDef(task); | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -203,7 +203,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset); | auto partition_table = reinterpret_cast<ModelPartitionTable *>(model_data + cur_offset); | ||||
size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | size_t partition_table_size = SIZE_OF_MODEL_PARTITION_TABLE(*partition_table); | ||||
cur_offset += partition_table_size; | cur_offset += partition_table_size; | ||||
GELOGD("Cur model index %zu: ModelPartitionTable num :%u, " | |||||
GELOGD("Cur model index %u: ModelPartitionTable num :%u, " | |||||
"ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | "ModelFileHeader length :%zu, ModelPartitionTable length :%zu", | ||||
index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | index, partition_table->num, sizeof(ModelFileHeader), partition_table_size); | ||||
if (model_data_size <= cur_offset) { | if (model_data_size <= cur_offset) { | ||||
@@ -219,7 +219,7 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
partition.type = partition_table->partition[i].type; | partition.type = partition_table->partition[i].type; | ||||
if (index >= model_contexts_.size()) { | if (index >= model_contexts_.size()) { | ||||
if (index != model_contexts_.size()) { | if (index != model_contexts_.size()) { | ||||
GELOGE(FAILED, "cur index is %zu make model_contexts_ overflow", index); | |||||
GELOGE(FAILED, "cur index is %u make model_contexts_ overflow", index); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -231,16 +231,16 @@ Status OmFileLoadHelper::LoadModelPartitionTable(uint8_t *model_data, uint32_t m | |||||
} | } | ||||
if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | if (partition.size > model_data_size || cur_offset > model_data_size - partition.size) { | ||||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %zu is greater than the model data size %u.", | |||||
GELOGE(GE_EXEC_MODEL_DATA_SIZE_INVALID, "The partition size %u is greater than the model data size %u.", | |||||
partition.size + cur_offset, model_data_size); | partition.size + cur_offset, model_data_size); | ||||
return GE_EXEC_MODEL_DATA_SIZE_INVALID; | return GE_EXEC_MODEL_DATA_SIZE_INVALID; | ||||
} | } | ||||
cur_offset += partition.size; | cur_offset += partition.size; | ||||
GELOGD("Partition, type:%d, size:%u, model_index:%zu", static_cast<int>(partition.type), partition.size, index); | |||||
GELOGD("Partition, type:%d, size:%u, model_index:%u", static_cast<int>(partition.type), partition.size, index); | |||||
} | } | ||||
} | } | ||||
if (cur_offset != model_data_size) { | if (cur_offset != model_data_size) { | ||||
GELOGE(FAILED, "do not get the complete model, read end offset:%zu, all size:%zu", cur_offset, model_data_size); | |||||
GELOGE(FAILED, "do not get the complete model, read end offset:%u, all size:%u", cur_offset, model_data_size); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
@@ -51,7 +51,7 @@ bool KernelStore::Build() { | |||||
kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | kernel_head.name_len = static_cast<uint32_t>(kernel->GetName().length()); | ||||
kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | kernel_head.bin_len = static_cast<uint32_t>(kernel->GetBinDataSize()); | ||||
GELOGD("get kernel bin name %s, addr %p, size %u", | |||||
GELOGD("get kernel bin name %s, addr %p, size %zu", | |||||
kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | kernel->GetName().c_str(), kernel->GetBinData(), kernel->GetBinDataSize()); | ||||
mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | mem_ret = memcpy_s(next_buffer, remain_len, &kernel_head, sizeof(kernel_head)); | ||||
GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | GE_CHK_BOOL_EXEC_NOLOG(mem_ret == EOK, return false); | ||||
@@ -878,11 +878,11 @@ inline Status CheckInt32DivOverflow(int32_t a, int32_t b) { | |||||
return INTERNAL_ERROR; \ | return INTERNAL_ERROR; \ | ||||
} | } | ||||
#define FMK_INT64_UINT32_MULCHECK(a, b) \ | |||||
if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ | |||||
GELOGW("Int64 %ld and UINT32 %u multiplication can result in overflow!", static_cast<uint32_t>(a), \ | |||||
static_cast<uint32_t>(b)); \ | |||||
return INTERNAL_ERROR; \ | |||||
#define FMK_INT64_UINT32_MULCHECK(a, b) \ | |||||
if (ge::CheckInt64Uint32MulOverflow((a), (b)) != SUCCESS) { \ | |||||
GELOGW("Int64 %ld and Uint32 %u multiplication can result in overflow!", static_cast<int64_t>(a), \ | |||||
static_cast<uint32_t>(b)); \ | |||||
return INTERNAL_ERROR; \ | |||||
} | } | ||||
#define FMK_FP16_ZEROCHECK(a) \ | #define FMK_FP16_ZEROCHECK(a) \ | ||||
@@ -21,7 +21,7 @@ | |||||
#include "framework/common/string_util.h" | #include "framework/common/string_util.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "runtime/base.h" | #include "runtime/base.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace { | namespace { | ||||
const char *const kTrainingTrace = "training_trace"; | const char *const kTrainingTrace = "training_trace"; | ||||
@@ -218,6 +218,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
uint32_t stream_id = task.stream_id; | uint32_t stream_id = task.stream_id; | ||||
std::string shape_type = task.shape_type; | std::string shape_type = task.shape_type; | ||||
int64_t cur_iter_num = task.cur_iter_num; | int64_t cur_iter_num = task.cur_iter_num; | ||||
uint32_t task_type = task.task_type; | |||||
data = model_name.append(" ") | data = model_name.append(" ") | ||||
.append(op_name).append(" ") | .append(op_name).append(" ") | ||||
.append(std::to_string(block_dim)).append(" ") | .append(std::to_string(block_dim)).append(" ") | ||||
@@ -225,7 +226,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin | |||||
.append(std::to_string(stream_id)).append(" ") | .append(std::to_string(stream_id)).append(" ") | ||||
.append(std::to_string(model_id)).append(" ") | .append(std::to_string(model_id)).append(" ") | ||||
.append(shape_type).append(" ") | .append(shape_type).append(" ") | ||||
.append(std::to_string(cur_iter_num)).append("\n"); | |||||
.append(std::to_string(cur_iter_num)).append(" ") | |||||
.append(std::to_string(task_type)).append("\n"); | |||||
ReporterData reporter_data{}; | ReporterData reporter_data{}; | ||||
reporter_data.deviceId = device_id; | reporter_data.deviceId = device_id; | ||||
@@ -57,6 +57,7 @@ message TaskDef { | |||||
LabelSetDef label_set = 37; | LabelSetDef label_set = 37; | ||||
LabelGotoExDef label_goto_ex = 38; | LabelGotoExDef label_goto_ex = 38; | ||||
LabelSwitchByIndexDef label_switch_by_index = 39; | LabelSwitchByIndexDef label_switch_by_index = 39; | ||||
KernelDefWithHandle kernel_with_handle = 40; | |||||
} | } | ||||
message KernelDef { | message KernelDef { | ||||
@@ -74,6 +75,19 @@ message KernelDef { | |||||
uint32 kernel_ext_info_size = 19; | uint32 kernel_ext_info_size = 19; | ||||
} | } | ||||
message KernelDefWithHandle { | |||||
KernelContext context = 1; | |||||
uint64 handle = 10; | |||||
string dev_func = 11; | |||||
uint32 block_dim = 12; | |||||
uint32 args_size = 13; | |||||
bytes args = 14; | |||||
bytes sm_desc = 15; | |||||
string original_kernel_key = 16; | |||||
string node_info = 17; | |||||
} | |||||
message KernelContext { | message KernelContext { | ||||
uint32 kernel_type = 1; | uint32 kernel_type = 1; | ||||
uint32 op_id = 2; // OP type in CCE | uint32 op_id = 2; // OP type in CCE | ||||
@@ -388,6 +388,7 @@ REGISTER_OPTYPE_DEFINE(HCOMRECEIVE, "HcomReceive"); | |||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | REGISTER_OPTYPE_DEFINE(HCOMREMOTEREAD, "HcomRemoteRead"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); | REGISTER_OPTYPE_DEFINE(HCOMREMOTEREFREAD, "HcomRemoteRefRead"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | REGISTER_OPTYPE_DEFINE(HCOMREMOTEWRITE, "HcomRemoteWrite"); | ||||
REGISTER_OPTYPE_DEFINE(HCOMREMOTESCATTERWRITE, "HcomRemoteScatterWrite"); | |||||
REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | REGISTER_OPTYPE_DEFINE(VARASSIGN, "VarAssign"); | ||||
REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | REGISTER_OPTYPE_DEFINE(VARISINITIALIZEDOP, "VarIsInitializedOp"); | ||||
@@ -32,37 +32,37 @@ set(SRC_LIST | |||||
"../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | "../hybrid/node_executor/aicpu/aicpu_ext_info.cc" | ||||
"../model/ge_model.cc" | "../model/ge_model.cc" | ||||
"../model/ge_root_model.cc" | "../model/ge_root_model.cc" | ||||
"../graph/load/new_model_manager/davinci_model.cc" | |||||
"../graph/load/new_model_manager/davinci_model_parser.cc" | |||||
"../graph/load/new_model_manager/model_manager.cc" | |||||
"../graph/load/new_model_manager/tbe_handle_store.cc" | |||||
"../graph/load/new_model_manager/cpu_queue_schedule.cc" | |||||
"../graph/load/new_model_manager/model_utils.cc" | |||||
"../graph/load/new_model_manager/aipp_utils.cc" | |||||
"../graph/load/new_model_manager/data_inputer.cc" | |||||
"../graph/load/new_model_manager/data_dumper.cc" | |||||
"../graph/load/new_model_manager/zero_copy_task.cc" | |||||
"../graph/load/new_model_manager/zero_copy_offset.cc" | |||||
"../graph/load/new_model_manager/task_info/task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/event_record_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/event_wait_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/fusion_start_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/kernel_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/label_set_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/stream_active_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/stream_switch_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/end_graph_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/model_exit_task_info.cc" | |||||
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"../graph/load/model_manager/davinci_model.cc" | |||||
"../graph/load/model_manager/davinci_model_parser.cc" | |||||
"../graph/load/model_manager/model_manager.cc" | |||||
"../graph/load/model_manager/tbe_handle_store.cc" | |||||
"../graph/load/model_manager/cpu_queue_schedule.cc" | |||||
"../graph/load/model_manager/model_utils.cc" | |||||
"../graph/load/model_manager/aipp_utils.cc" | |||||
"../graph/load/model_manager/data_inputer.cc" | |||||
"../graph/load/model_manager/data_dumper.cc" | |||||
"../graph/load/model_manager/zero_copy_task.cc" | |||||
"../graph/load/model_manager/zero_copy_offset.cc" | |||||
"../graph/load/model_manager/task_info/task_info.cc" | |||||
"../graph/load/model_manager/task_info/event_record_task_info.cc" | |||||
"../graph/load/model_manager/task_info/event_wait_task_info.cc" | |||||
"../graph/load/model_manager/task_info/fusion_start_task_info.cc" | |||||
"../graph/load/model_manager/task_info/fusion_stop_task_info.cc" | |||||
"../graph/load/model_manager/task_info/kernel_ex_task_info.cc" | |||||
"../graph/load/model_manager/task_info/kernel_task_info.cc" | |||||
"../graph/load/model_manager/task_info/label_set_task_info.cc" | |||||
"../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc" | |||||
"../graph/load/model_manager/task_info/label_goto_ex_task_info.cc" | |||||
"../graph/load/model_manager/task_info/memcpy_async_task_info.cc" | |||||
"../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc" | |||||
"../graph/load/model_manager/task_info/profiler_trace_task_info.cc" | |||||
"../graph/load/model_manager/task_info/stream_active_task_info.cc" | |||||
"../graph/load/model_manager/task_info/stream_switch_task_info.cc" | |||||
"../graph/load/model_manager/task_info/stream_switchn_task_info.cc" | |||||
"../graph/load/model_manager/task_info/end_graph_task_info.cc" | |||||
"../graph/load/model_manager/task_info/model_exit_task_info.cc" | |||||
"../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc" | |||||
"../graph/load/model_manager/task_info/super_kernel/super_kernel.cc" | |||||
"../graph/common/local_context.cc" | "../graph/common/local_context.cc" | ||||
"../opskernel_manager/ops_kernel_builder_manager.cc" | "../opskernel_manager/ops_kernel_builder_manager.cc" | ||||
"../single_op/single_op_manager.cc" | "../single_op/single_op_manager.cc" | ||||
@@ -104,6 +104,7 @@ set(SRC_LIST | |||||
"../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc" | ||||
"../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc" | ||||
"../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc" | ||||
"../hybrid/node_executor/host_cpu/kernel/data_kernel.cc" | |||||
"../hybrid/node_executor/controlop/control_op_executor.cc" | "../hybrid/node_executor/controlop/control_op_executor.cc" | ||||
"../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc" | ||||
"../hybrid/node_executor/rts/rts_node_executor.cc" | "../hybrid/node_executor/rts/rts_node_executor.cc" | ||||
@@ -29,15 +29,15 @@ | |||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/execute/graph_execute.h" | #include "graph/execute/graph_execute.h" | ||||
#include "graph/load/graph_loader.h" | #include "graph/load/graph_loader.h" | ||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include "graph/model.h" | #include "graph/model.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "mmpa/mmpa_api.h" | #include "mmpa/mmpa_api.h" | ||||
#include "single_op/single_op_manager.h" | #include "single_op/single_op_manager.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "opskernel_manager/ops_kernel_builder_manager.h" | #include "opskernel_manager/ops_kernel_builder_manager.h" | ||||
using std::string; | using std::string; | ||||
@@ -454,7 +454,7 @@ Status GeExecutor::GetCurDynamicDims(uint32_t model_id, const vector<uint64_t> & | |||||
if (all_data_dims[i] < 0) { | if (all_data_dims[i] < 0) { | ||||
cur_dynamic_dims.push_back(dynamic_dims[i]); | cur_dynamic_dims.push_back(dynamic_dims[i]); | ||||
} else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | } else if (static_cast<uint64_t>(all_data_dims[i]) != dynamic_dims[i]) { | ||||
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %d should be %d", | |||||
GELOGE(ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID, "Static dims should be same, index: %zu value: %lu should be %ld", | |||||
i, dynamic_dims[i], all_data_dims[i]); | i, dynamic_dims[i], all_data_dims[i]); | ||||
return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | return ACL_ERROR_GE_DYNAMIC_INPUT_LENGTH_INVALID; | ||||
} | } | ||||
@@ -22,37 +22,37 @@ local_ge_executor_src_files := \ | |||||
../graph/manager/util/debug.cc \ | ../graph/manager/util/debug.cc \ | ||||
../model/ge_model.cc \ | ../model/ge_model.cc \ | ||||
../model/ge_root_model.cc \ | ../model/ge_root_model.cc \ | ||||
../graph/load/new_model_manager/davinci_model.cc \ | |||||
../graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
../graph/load/new_model_manager/model_manager.cc \ | |||||
../graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
../graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
../graph/load/new_model_manager/model_utils.cc \ | |||||
../graph/load/new_model_manager/aipp_utils.cc \ | |||||
../graph/load/new_model_manager/data_inputer.cc \ | |||||
../graph/load/new_model_manager/data_dumper.cc \ | |||||
../graph/load/new_model_manager/zero_copy_task.cc \ | |||||
../graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
../graph/load/new_model_manager/task_info/task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||||
../graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
../graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
../graph/load/model_manager/davinci_model.cc \ | |||||
../graph/load/model_manager/davinci_model_parser.cc \ | |||||
../graph/load/model_manager/model_manager.cc \ | |||||
../graph/load/model_manager/tbe_handle_store.cc \ | |||||
../graph/load/model_manager/cpu_queue_schedule.cc \ | |||||
../graph/load/model_manager/model_utils.cc \ | |||||
../graph/load/model_manager/aipp_utils.cc \ | |||||
../graph/load/model_manager/data_inputer.cc \ | |||||
../graph/load/model_manager/data_dumper.cc \ | |||||
../graph/load/model_manager/zero_copy_task.cc \ | |||||
../graph/load/model_manager/zero_copy_offset.cc \ | |||||
../graph/load/model_manager/task_info/task_info.cc \ | |||||
../graph/load/model_manager/task_info/event_record_task_info.cc \ | |||||
../graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||||
../graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||||
../graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||||
../graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||||
../graph/load/model_manager/task_info/kernel_task_info.cc \ | |||||
../graph/load/model_manager/task_info/label_set_task_info.cc \ | |||||
../graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
../graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
../graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||||
../graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
../graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||||
../graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||||
../graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||||
../graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||||
../graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||||
../graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||||
../graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
../graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
../opskernel_manager/ops_kernel_builder_manager.cc \ | ../opskernel_manager/ops_kernel_builder_manager.cc \ | ||||
../single_op/single_op_manager.cc \ | ../single_op/single_op_manager.cc \ | ||||
../single_op/single_op_model.cc \ | ../single_op/single_op_model.cc \ | ||||
@@ -95,6 +95,7 @@ local_ge_executor_src_files := \ | |||||
../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | ||||
../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | ||||
../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | ||||
../hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||||
../hybrid/node_executor/controlop/control_op_executor.cc \ | ../hybrid/node_executor/controlop/control_op_executor.cc \ | ||||
../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | ||||
../hybrid/node_executor/rts/rts_node_executor.cc \ | ../hybrid/node_executor/rts/rts_node_executor.cc \ | ||||
@@ -57,6 +57,7 @@ message TaskDef { | |||||
LabelSetDef label_set = 37; | LabelSetDef label_set = 37; | ||||
LabelGotoExDef label_goto_ex = 38; | LabelGotoExDef label_goto_ex = 38; | ||||
LabelSwitchByIndexDef label_switch_by_index = 39; | LabelSwitchByIndexDef label_switch_by_index = 39; | ||||
KernelDefWithHandle kernel_with_handle = 40; | |||||
} | } | ||||
message KernelDef { | message KernelDef { | ||||
@@ -74,6 +75,19 @@ message KernelDef { | |||||
uint32 kernel_ext_info_size = 19; | uint32 kernel_ext_info_size = 19; | ||||
} | } | ||||
message KernelDefWithHandle { | |||||
KernelContext context = 1; | |||||
uint64 handle = 10; | |||||
string dev_func = 11; | |||||
uint32 block_dim = 12; | |||||
uint32 args_size = 13; | |||||
bytes args = 14; | |||||
bytes sm_desc = 15; | |||||
string original_kernel_key = 16; | |||||
string node_info = 17; | |||||
} | |||||
message KernelContext { | message KernelContext { | ||||
uint32 kernel_type = 1; | uint32 kernel_type = 1; | ||||
uint32 op_id = 2; // OP type in CCE | uint32 op_id = 2; // OP type in CCE | ||||
@@ -228,37 +228,37 @@ OME_HOST_SRC_FILES := \ | |||||
graph/manager/util/rt_context_util.cc \ | graph/manager/util/rt_context_util.cc \ | ||||
graph/manager/util/variable_accelerate_ctrl.cc \ | graph/manager/util/variable_accelerate_ctrl.cc \ | ||||
graph/manager/util/debug.cc \ | graph/manager/util/debug.cc \ | ||||
graph/load/new_model_manager/model_manager.cc \ | |||||
graph/load/new_model_manager/data_inputer.cc \ | |||||
graph/load/new_model_manager/davinci_model.cc \ | |||||
graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
graph/load/new_model_manager/model_utils.cc \ | |||||
graph/load/new_model_manager/aipp_utils.cc \ | |||||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
graph/load/new_model_manager/zero_copy_task.cc \ | |||||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
graph/load/new_model_manager/data_dumper.cc \ | |||||
graph/load/new_model_manager/task_info/task_info.cc \ | |||||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
graph/load/model_manager/model_manager.cc \ | |||||
graph/load/model_manager/data_inputer.cc \ | |||||
graph/load/model_manager/davinci_model.cc \ | |||||
graph/load/model_manager/davinci_model_parser.cc \ | |||||
graph/load/model_manager/model_utils.cc \ | |||||
graph/load/model_manager/aipp_utils.cc \ | |||||
graph/load/model_manager/tbe_handle_store.cc \ | |||||
graph/load/model_manager/cpu_queue_schedule.cc \ | |||||
graph/load/model_manager/zero_copy_task.cc \ | |||||
graph/load/model_manager/zero_copy_offset.cc \ | |||||
graph/load/model_manager/data_dumper.cc \ | |||||
graph/load/model_manager/task_info/task_info.cc \ | |||||
graph/load/model_manager/task_info/event_record_task_info.cc \ | |||||
graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||||
graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||||
graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||||
graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||||
graph/load/model_manager/task_info/kernel_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_set_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||||
graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||||
graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||||
graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||||
graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
single_op/task/op_task.cc \ | single_op/task/op_task.cc \ | ||||
single_op/task/build_task_utils.cc \ | single_op/task/build_task_utils.cc \ | ||||
single_op/task/tbe_task_builder.cc \ | single_op/task/tbe_task_builder.cc \ | ||||
@@ -270,7 +270,7 @@ OME_HOST_SRC_FILES := \ | |||||
single_op/single_op_manager.cc \ | single_op/single_op_manager.cc \ | ||||
hybrid/hybrid_davinci_model_stub.cc \ | hybrid/hybrid_davinci_model_stub.cc \ | ||||
hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | hybrid/node_executor/aicpu/aicpu_ext_info.cc \ | ||||
# graph/load/new_model_manager/task_info/hccl_task_info.cc | |||||
# graph/load/model_manager/task_info/hccl_task_info.cc | |||||
OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | OME_DEVICE_SRC_FILES := $(OME_HOST_SRC_FILES) | ||||
@@ -33,7 +33,7 @@ namespace { | |||||
uint64_t size = data_num * sizeof(TYPE); \ | uint64_t size = data_num * sizeof(TYPE); \ | ||||
ge_tensor = MakeShared<GeTensor>(out_desc, size); \ | ge_tensor = MakeShared<GeTensor>(out_desc, size); \ | ||||
GE_CHECK_NOTNULL(ge_tensor); \ | GE_CHECK_NOTNULL(ge_tensor); \ | ||||
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, size); \ | |||||
GELOGD("node:%s allocate output %zu success, size=%ld", op_desc->GetName().c_str(), i, size); \ | |||||
ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | ge_tensor->MutableTensorDesc().SetDataType(out_desc.GetDataType()); \ | ||||
ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | ge_tensor->MutableTensorDesc().SetShape(out_desc.GetShape()); \ | ||||
} else { \ | } else { \ | ||||
@@ -72,7 +72,7 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) { | |||||
num_size = max_range_size; | num_size = max_range_size; | ||||
} | } | ||||
if (num_size < 0) { | if (num_size < 0) { | ||||
GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%lld.", num_size); | |||||
GELOGE(INTERNAL_ERROR, "Get negative size, num_size=%ld.", num_size); | |||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
data_num = static_cast<uint64_t>(num_size); | data_num = static_cast<uint64_t>(num_size); | ||||
@@ -57,6 +57,7 @@ message TaskDef { | |||||
LabelSetDef label_set = 37; | LabelSetDef label_set = 37; | ||||
LabelGotoExDef label_goto_ex = 38; | LabelGotoExDef label_goto_ex = 38; | ||||
LabelSwitchByIndexDef label_switch_by_index = 39; | LabelSwitchByIndexDef label_switch_by_index = 39; | ||||
KernelDefWithHandle kernel_with_handle = 40; | |||||
} | } | ||||
message KernelDef { | message KernelDef { | ||||
@@ -74,6 +75,19 @@ message KernelDef { | |||||
uint32 kernel_ext_info_size = 19; | uint32 kernel_ext_info_size = 19; | ||||
} | } | ||||
message KernelDefWithHandle { | |||||
KernelContext context = 1; | |||||
uint64 handle = 10; | |||||
string dev_func = 11; | |||||
uint32 block_dim = 12; | |||||
uint32 args_size = 13; | |||||
bytes args = 14; | |||||
bytes sm_desc = 15; | |||||
string original_kernel_key = 16; | |||||
string node_info = 17; | |||||
} | |||||
message KernelContext { | message KernelContext { | ||||
uint32 kernel_type = 1; | uint32 kernel_type = 1; | ||||
uint32 op_id = 2; // OP type in CCE | uint32 op_id = 2; // OP type in CCE | ||||
@@ -54,38 +54,38 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
graph/label/partitioned_call_label_maker.cc \ | graph/label/partitioned_call_label_maker.cc \ | ||||
graph/label/while_label_maker.cc \ | graph/label/while_label_maker.cc \ | ||||
graph/load/graph_loader.cc \ | graph/load/graph_loader.cc \ | ||||
graph/load/new_model_manager/cpu_queue_schedule.cc \ | |||||
graph/load/new_model_manager/data_dumper.cc \ | |||||
graph/load/new_model_manager/data_inputer.cc \ | |||||
graph/load/new_model_manager/davinci_model.cc \ | |||||
graph/load/new_model_manager/davinci_model_parser.cc \ | |||||
graph/load/new_model_manager/model_manager.cc \ | |||||
graph/load/new_model_manager/model_utils.cc \ | |||||
graph/load/new_model_manager/aipp_utils.cc \ | |||||
graph/load/new_model_manager/task_info/end_graph_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/model_exit_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/event_record_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/event_wait_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/fusion_start_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/fusion_stop_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/hccl_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/kernel_ex_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/kernel_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_set_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/memcpy_async_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/profiler_trace_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_active_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_switch_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/stream_switchn_task_info.cc \ | |||||
graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
graph/load/new_model_manager/task_info/task_info.cc \ | |||||
graph/load/new_model_manager/tbe_handle_store.cc \ | |||||
graph/load/new_model_manager/zero_copy_task.cc \ | |||||
graph/load/new_model_manager/zero_copy_offset.cc \ | |||||
graph/load/model_manager/cpu_queue_schedule.cc \ | |||||
graph/load/model_manager/data_dumper.cc \ | |||||
graph/load/model_manager/data_inputer.cc \ | |||||
graph/load/model_manager/davinci_model.cc \ | |||||
graph/load/model_manager/davinci_model_parser.cc \ | |||||
graph/load/model_manager/model_manager.cc \ | |||||
graph/load/model_manager/model_utils.cc \ | |||||
graph/load/model_manager/aipp_utils.cc \ | |||||
graph/load/model_manager/task_info/end_graph_task_info.cc \ | |||||
graph/load/model_manager/task_info/model_exit_task_info.cc \ | |||||
graph/load/model_manager/task_info/event_record_task_info.cc \ | |||||
graph/load/model_manager/task_info/event_wait_task_info.cc \ | |||||
graph/load/model_manager/task_info/fusion_start_task_info.cc \ | |||||
graph/load/model_manager/task_info/fusion_stop_task_info.cc \ | |||||
graph/load/model_manager/task_info/hccl_task_info.cc \ | |||||
graph/load/model_manager/task_info/kernel_ex_task_info.cc \ | |||||
graph/load/model_manager/task_info/kernel_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_set_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_switch_by_index_task_info.cc \ | |||||
graph/load/model_manager/task_info/label_goto_ex_task_info.cc \ | |||||
graph/load/model_manager/task_info/memcpy_addr_async_task_info.cc \ | |||||
graph/load/model_manager/task_info/memcpy_async_task_info.cc \ | |||||
graph/load/model_manager/task_info/profiler_trace_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_active_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_switch_task_info.cc \ | |||||
graph/load/model_manager/task_info/stream_switchn_task_info.cc \ | |||||
graph/load/model_manager/task_info/super_kernel/super_kernel.cc \ | |||||
graph/load/model_manager/task_info/super_kernel/super_kernel_factory.cc \ | |||||
graph/load/model_manager/task_info/task_info.cc \ | |||||
graph/load/model_manager/tbe_handle_store.cc \ | |||||
graph/load/model_manager/zero_copy_task.cc \ | |||||
graph/load/model_manager/zero_copy_offset.cc \ | |||||
graph/manager/graph_context.cc \ | graph/manager/graph_context.cc \ | ||||
graph/manager/graph_manager.cc \ | graph/manager/graph_manager.cc \ | ||||
graph/manager/graph_manager_utils.cc \ | graph/manager/graph_manager_utils.cc \ | ||||
@@ -300,6 +300,7 @@ LIBGE_LOCAL_SRC_FILES := \ | |||||
hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | hybrid/node_executor/host_cpu/kernel/variable_kernel.cc \ | ||||
hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | hybrid/node_executor/host_cpu/kernel/assign_kernel.cc \ | ||||
hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc \ | ||||
hybrid/node_executor/host_cpu/kernel/data_kernel.cc \ | |||||
hybrid/node_executor/controlop/control_op_executor.cc \ | hybrid/node_executor/controlop/control_op_executor.cc \ | ||||
hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \ | ||||
hybrid/node_executor/hccl/hccl_node_executor.cc \ | hybrid/node_executor/hccl/hccl_node_executor.cc \ | ||||
@@ -187,8 +187,7 @@ Status GraphBuilder::UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||||
GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||||
Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_root_model_ptr, uint64_t session_id) { | |||||
if (comp_graph == nullptr) { | if (comp_graph == nullptr) { | ||||
GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | GELOGE(GE_GRAPH_PARAM_NULLPTR, "Graph build comp_graph is null."); | ||||
return GE_GRAPH_PARAM_NULLPTR; | return GE_GRAPH_PARAM_NULLPTR; | ||||
@@ -203,18 +202,18 @@ Status GraphBuilder::Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfo | |||||
(void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | (void)AttrUtils::GetBool(comp_graph, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape); | ||||
if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) { | if (is_dynamic_shape || comp_graph->GetGraphUnknownFlag()) { | ||||
GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
BuildForDynamicShapeGraph(comp_graph, subgraph_ptr_list, ge_root_model_ptr, ge_model_ptr, session_id), | |||||
BuildForDynamicShapeGraph(comp_graph, ge_root_model_ptr, ge_model_ptr, session_id), | |||||
"Build for dynamic shape graph failed."); | "Build for dynamic shape graph failed."); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(comp_graph, ge_model_ptr, session_id), | |||||
"Build for known shape graph failed."); | "Build for known shape graph failed."); | ||||
ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); | ge_root_model_ptr->SetSubgraphInstanceNameToModel(comp_graph->GetName(), ge_model_ptr); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||||
Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||||
GeModelPtr &ge_model_ptr, uint64_t session_id) { | GeModelPtr &ge_model_ptr, uint64_t session_id) { | ||||
if (ge::GetContext().GetHostExecFlag()) { | if (ge::GetContext().GetHostExecFlag()) { | ||||
GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); | GE_CHK_STATUS_RET(BuildForHostCpuGraph(comp_graph, ge_model_ptr, session_id), "Build for host-cpu graph failed."); | ||||
@@ -222,7 +221,7 @@ Status GraphBuilder::BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::v | |||||
} | } | ||||
GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | GELOGI("Begin to build known shape graph[%s].", comp_graph->GetName().c_str()); | ||||
Status ret = SecondPartition(comp_graph, subgraph_list); | |||||
Status ret = SecondPartition(comp_graph); | |||||
GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | GE_CHK_STATUS_RET(ret, "Graph[%s] second partition Failed.", comp_graph->GetName().c_str()); | ||||
auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | auto subgraph_map = graph_partitioner_.GetSubGraphMap(); | ||||
@@ -470,7 +469,6 @@ Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) { | |||||
} | } | ||||
Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | ||||
std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||||
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id) { | uint64_t session_id) { | ||||
GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | GELOGI("Start to build BuildForDynamicShape for dynamic shape."); | ||||
@@ -517,7 +515,7 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
} | } | ||||
} | } | ||||
// known shape build flow | // known shape build flow | ||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, subgraph_ptr_list, ge_model_ptr, session_id), | |||||
GE_CHK_STATUS_RET(BuildForKnownShapeGraph(sub_graph, ge_model_ptr, session_id), | |||||
"Build for known shape graph failed."); | "Build for known shape graph failed."); | ||||
} | } | ||||
ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | ge_root_model_ptr->SetSubgraphInstanceNameToModel(sub_graph->GetName(), ge_model_ptr); | ||||
@@ -719,7 +717,7 @@ Status GraphBuilder::CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc) | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list) { | |||||
Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph) { | |||||
GE_TIMESTAMP_START(GraphPartition2); | GE_TIMESTAMP_START(GraphPartition2); | ||||
auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | auto ret = graph_partitioner_.Partition(comp_graph, GraphPartitioner::kSecondPartitioning); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -727,10 +725,8 @@ Status GraphBuilder::SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge: | |||||
return ret; | return ret; | ||||
} | } | ||||
GE_CHK_STATUS_RET(ret, "Graph partition Failed."); | GE_CHK_STATUS_RET(ret, "Graph partition Failed."); | ||||
auto graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||||
if (graph_2_subgraphlist.find(comp_graph) != graph_2_subgraphlist.end()) { | |||||
subgraph_ptr_list = graph_2_subgraphlist[comp_graph]; | |||||
} else { | |||||
const auto &graph_2_subgraphlist = graph_partitioner_.GetSubGraphMap(); | |||||
if (graph_2_subgraphlist.find(comp_graph) == graph_2_subgraphlist.end()) { | |||||
GELOGE(FAILED, "Find subgraph failed."); | GELOGE(FAILED, "Find subgraph failed."); | ||||
return FAILED; | return FAILED; | ||||
} | } | ||||
@@ -745,7 +741,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { | |||||
if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { | if (!AttrUtils::GetInt(op_desc, ATTR_INPUT_MEMORY_TYPE, mem_type)) { | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
GELOGD("[%s] has attr input_memory_type %ld", op_desc->GetName().c_str(), mem_type); | |||||
GELOGD("[%s] has attr input_memory_type %u", op_desc->GetName().c_str(), mem_type); | |||||
for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | for (const auto &in_data_anchor : node->GetAllInDataAnchors()) { | ||||
const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | const auto &peer_out_anchor = in_data_anchor->GetPeerOutAnchor(); | ||||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue); | ||||
@@ -755,7 +751,7 @@ Status GraphBuilder::AddOutputMemTypeForNode(const NodePtr &node) { | |||||
while (true) { | while (true) { | ||||
const auto &src_desc = src_node->GetOpDesc(); | const auto &src_desc = src_node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(src_desc == nullptr, continue); | GE_IF_BOOL_EXEC(src_desc == nullptr, continue); | ||||
GELOGD("[%s:%u] set attr output_memory_type %ld", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), | |||||
GELOGD("[%s:%u] set attr output_memory_type %d", src_desc->GetName().c_str(), src_out_anchor->GetIdx(), | |||||
mem_type); | mem_type); | ||||
if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, | if (!AttrUtils::SetInt(src_desc->MutableOutputDesc(src_out_anchor->GetIdx()), ATTR_OUTPUT_MEMORY_TYPE, | ||||
mem_type)) { | mem_type)) { | ||||
@@ -47,8 +47,7 @@ class GraphBuilder { | |||||
GraphBuilder(const GraphBuilder &in) = delete; | GraphBuilder(const GraphBuilder &in) = delete; | ||||
GraphBuilder &operator=(const GraphBuilder &in) = delete; | GraphBuilder &operator=(const GraphBuilder &in) = delete; | ||||
virtual ~GraphBuilder() = default; | virtual ~GraphBuilder() = default; | ||||
Status Build(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||||
GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||||
Status Build(ComputeGraphPtr &comp_graph, GeRootModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | |||||
void SetOptions(const GraphManagerOptions &options); | void SetOptions(const GraphManagerOptions &options); | ||||
private: | private: | ||||
@@ -59,12 +58,12 @@ class GraphBuilder { | |||||
Status UpdateDataInputSize(const ge::NodePtr &node_ptr); | Status UpdateDataInputSize(const ge::NodePtr &node_ptr); | ||||
Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | Status UpdateParentNodeOutputSize(const ge::ComputeGraphPtr &graph, ge::NodePtr &parent_node_ptr); | ||||
Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | Status CalcDynShapeRootGraphDataSize(const ge::OpDescPtr &op_desc); | ||||
Status SecondPartition(ge::ComputeGraphPtr &comp_graph, vector<ge::SubGraphInfoPtr> &subgraph_ptr_list); | |||||
Status SecondPartition(ge::ComputeGraphPtr &comp_graph); | |||||
Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); | Status MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph); | ||||
Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_ptr_list, | |||||
Status BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph, | |||||
GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, std::vector<SubGraphInfoPtr> &subgraph_list, | |||||
Status BuildForKnownShapeGraph(ComputeGraphPtr &comp_graph, | |||||
GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | GeModelPtr &ge_model_ptr, uint64_t session_id = INVALID_SESSION_ID); | ||||
Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | Status BuildForUnknownShapeGraph(ComputeGraphPtr &comp_graph, GeModelPtr &ge_model_ptr, | ||||
uint64_t session_id = INVALID_SESSION_ID); | uint64_t session_id = INVALID_SESSION_ID); | ||||
@@ -69,8 +69,8 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) { | |||||
GELOGW("Vector all_memory_size is empty!"); | GELOGW("Vector all_memory_size is empty!"); | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) { | |||||
GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front()); | |||||
if ((all_memory_size.front() == 0) || (log(kLogBase) == 0)) { | |||||
GELOGE(FAILED, "dividend is 0!"); | |||||
return FAILED; | return FAILED; | ||||
} | } | ||||
// Memory size is 512 aligned, so it is not necessary to take less than 512 | // Memory size is 512 aligned, so it is not necessary to take less than 512 | ||||
@@ -24,6 +24,7 @@ | |||||
#include "graph/buffer.h" | #include "graph/buffer.h" | ||||
#include "graph/ge_attr_value.h" | #include "graph/ge_attr_value.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/types.h" | |||||
#include "graph/node.h" | #include "graph/node.h" | ||||
#include "graph/utils/graph_utils.h" | #include "graph/utils/graph_utils.h" | ||||
#include "graph/utils/node_utils.h" | #include "graph/utils/node_utils.h" | ||||
@@ -65,7 +66,10 @@ void AlignMemOffset(size_t &mem_align_size) { | |||||
} | } | ||||
static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | static bool CompareLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | ||||
if (left.GetLifeBegin() < right.GetLifeBegin()) { | |||||
auto left_node_op_desc = left.node->GetOpDesc(); | |||||
auto right_node_op_desc = right.node->GetOpDesc(); | |||||
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr) | |||||
&& (left_node_op_desc->GetId() < right_node_op_desc->GetId())) { | |||||
return true; | return true; | ||||
} | } | ||||
return false; | return false; | ||||
@@ -97,14 +101,14 @@ bool CrossLifeTime(const NodeTypeIndex &left, const NodeTypeIndex &right) { | |||||
auto left_node_op_desc = left.node->GetOpDesc(); | auto left_node_op_desc = left.node->GetOpDesc(); | ||||
auto right_node_op_desc = right.node->GetOpDesc(); | auto right_node_op_desc = right.node->GetOpDesc(); | ||||
if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { | if ((left_node_op_desc != nullptr) && (right_node_op_desc != nullptr)) { | ||||
if (left.GetLifeBegin() < right.GetLifeBegin()) { | |||||
if (left.life_time_end >= right.GetLifeBegin()) { | |||||
if (left_node_op_desc->GetId() < right_node_op_desc->GetId()) { | |||||
if (left.life_time_end >= static_cast<size_t>(right_node_op_desc->GetId())) { | |||||
return true; | return true; | ||||
} | } | ||||
} else if (left.GetLifeBegin() == right.GetLifeBegin()) { | |||||
} else if (left_node_op_desc->GetId() == right_node_op_desc->GetId()) { | |||||
return true; | return true; | ||||
} else { | } else { | ||||
if (right.life_time_end >= left.GetLifeBegin()) { | |||||
if (right.life_time_end >= static_cast<size_t>(left_node_op_desc->GetId())) { | |||||
return true; | return true; | ||||
} | } | ||||
} | } | ||||
@@ -322,7 +326,12 @@ void MemoryBlock::AddLifeReuseBlock(MemoryBlock *block, DependStreamLife &total_ | |||||
size_t MemoryBlock::GetLifeBegin() { | size_t MemoryBlock::GetLifeBegin() { | ||||
size_t life_time = 0; | size_t life_time = 0; | ||||
if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
life_time = node_type_index_list_.front().GetLifeBegin(); | |||||
if (node_type_index_list_.front().node != nullptr) { | |||||
auto node_op_desc = node_type_index_list_.front().node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
life_time = node_op_desc->GetId(); | |||||
} | |||||
} | |||||
} | } | ||||
return life_time; | return life_time; | ||||
} | } | ||||
@@ -409,7 +418,7 @@ void MemoryBlock::AddDependLifeBegin(DependStreamLife &total_node_depend_stream_ | |||||
depend_stream_life_[stream_id_] = GetLifeBegin(); | depend_stream_life_[stream_id_] = GetLifeBegin(); | ||||
} | } | ||||
size_t MemoryBlock::GetLifeEnd() const { | |||||
size_t MemoryBlock::GetLifeEnd() { | |||||
if (!node_type_index_list_.empty()) { | if (!node_type_index_list_.empty()) { | ||||
return node_type_index_list_.back().life_time_end; | return node_type_index_list_.back().life_time_end; | ||||
} | } | ||||
@@ -542,11 +551,31 @@ void GetMaxBatchAllMemorySize(std::map<std::string, vector<int64_t>> &batch_all_ | |||||
} | } | ||||
} | } | ||||
void BlockMemAssigner::MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node) { | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, return); | |||||
// if input size just one and from variable, no need to reassign continuous memory | |||||
bool is_input_continuous = false; | |||||
(void)ge::AttrUtils::GetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
if (is_input_continuous && (node_op_desc->GetInputsSize() == 1)) { | |||||
auto peer_out_anchor = node->GetInDataAnchor(0)->GetPeerOutAnchor(); | |||||
GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, return); | |||||
auto in_node = peer_out_anchor->GetOwnerNode(); | |||||
GE_IF_BOOL_EXEC(in_node == nullptr, return); | |||||
if (in_node->GetType() == VARIABLE || in_node->GetType() == CONSTANT) { | |||||
GELOGI("node only one input and from variable, set continuous alloced. node_name:%s", node->GetName().c_str()); | |||||
(void)ge::AttrUtils::SetBool(node_op_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | |||||
} | |||||
} | |||||
} | |||||
void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | ||||
vector<int64_t> temp; | vector<int64_t> temp; | ||||
std::map<std::string, vector<int64_t>> batch_all_memory_size; | std::map<std::string, vector<int64_t>> batch_all_memory_size; | ||||
std::map<std::string, int64_t> batch_total_size; | std::map<std::string, int64_t> batch_total_size; | ||||
for (const NodePtr &n : compute_graph_->GetAllNodes()) { | for (const NodePtr &n : compute_graph_->GetAllNodes()) { | ||||
MarkContinuousAllocedForOneInputFromVariable(n); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | GE_IF_BOOL_EXEC(node_op_desc == nullptr, continue); | ||||
@@ -563,29 +592,32 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) { | |||||
for (auto &out_anchor : n->GetAllOutDataAnchors()) { | for (auto &out_anchor : n->GetAllOutDataAnchors()) { | ||||
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx()); | ||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
GE_IF_BOOL_EXEC(size < 0, GELOGE(FAILED, "Node:%s size:%ld is invalid, maybe it is unknown shape node.", | |||||
node_op_desc->GetName().c_str(), size); | |||||
return;); | |||||
batch_all_memory_size[batch_label].emplace_back(size); | |||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||||
batch_total_size[batch_label] = size; | |||||
} else { | |||||
batch_total_size[batch_label] += size; | |||||
} | |||||
if (!anchor_to_symbol_.empty()) { | |||||
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); | |||||
if (iter1 == anchor_to_symbol_.end()) { | |||||
continue; | |||||
bool reuse_input = false; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInput(output_desc, reuse_input) != SUCCESS, | |||||
GELOGI("Get reuse_input failed")); | |||||
if (!reuse_input) { | |||||
int64_t size = 0; | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
batch_all_memory_size[batch_label].emplace_back(size); | |||||
if (batch_total_size.find(batch_label) == batch_total_size.end()) { | |||||
batch_total_size[batch_label] = size; | |||||
} else { | |||||
batch_total_size[batch_label] += size; | |||||
} | } | ||||
const std::string &symbol = iter1->second; | |||||
auto iter2 = symbol_size_.find(symbol); | |||||
if (iter2 == symbol_size_.end()) { | |||||
symbol_size_[symbol] = size; | |||||
} else if (size > static_cast<int64_t>(iter2->second)) { | |||||
iter2->second = size; | |||||
if (!anchor_to_symbol_.empty()) { | |||||
auto iter1 = anchor_to_symbol_.find(NodeIndexIO(n, out_anchor->GetIdx(), kOut).ToString()); | |||||
if (iter1 == anchor_to_symbol_.end()) { | |||||
continue; | |||||
} | |||||
const std::string &symbol = iter1->second; | |||||
auto iter2 = symbol_size_.find(symbol); | |||||
if (iter2 == symbol_size_.end()) { | |||||
symbol_size_[symbol] = size; | |||||
} else if (size > static_cast<int64_t>(iter2->second)) { | |||||
iter2->second = size; | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -626,17 +658,35 @@ bool IsDirectOutputNode(const NodePtr &node, int idx) { | |||||
return false; | return false; | ||||
} | } | ||||
bool CanReuseBlock(size_t continuous_life_begin, const MemoryBlock &reusable_block, size_t block_size) { | |||||
void AddReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) { | |||||
string key = std::to_string(mem_block.Size()); | |||||
key += "_" + std::to_string(mem_block.stream_id_); | |||||
key += "_" + std::to_string(mem_block.memory_type_); | |||||
auto it = reusable_block_counts.find(key); | |||||
if (it != reusable_block_counts.end()) { | |||||
it->second++; | |||||
} else { | |||||
reusable_block_counts[key] = 1; | |||||
} | |||||
} | |||||
void ReduceReusableBlockCount(const MemoryBlock &mem_block, map<string, uint64_t> &reusable_block_counts) { | |||||
string key = std::to_string(mem_block.Size()); | |||||
key += "_" + std::to_string(mem_block.stream_id_); | |||||
key += "_" + std::to_string(mem_block.memory_type_); | |||||
auto it = reusable_block_counts.find(key); | |||||
if (it != reusable_block_counts.end()) { | |||||
if (it->second > 0) { | |||||
it->second--; | |||||
} | |||||
} | |||||
} | |||||
bool CanReuseBySize(const map<string, uint64_t> &reusable_block_counts, const MemoryBlock &reusable_block, | |||||
size_t block_size, size_t real_size, bool continuous) { | |||||
bool can_reuse = false; | bool can_reuse = false; | ||||
if (reusable_block.Size() == block_size) { | if (reusable_block.Size() == block_size) { | ||||
// in some continuous input case, continuous first input node's is not same as topo first node. | |||||
if (continuous_life_begin > 0) { | |||||
if (continuous_life_begin > reusable_block.GetLifeEnd()) { | |||||
can_reuse = true; | |||||
} | |||||
} else { | |||||
can_reuse = true; | |||||
} | |||||
can_reuse = true; | |||||
} | } | ||||
return can_reuse; | return can_reuse; | ||||
} | } | ||||
@@ -647,13 +697,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | if (n == nullptr || n->GetAllOutDataAnchors().size() <= 0) { | ||||
return false; | return false; | ||||
} | } | ||||
auto node_desc = n->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(node_desc == nullptr, GELOGE(FAILED, "Node[%s] nodedesc is null.", n->GetName().c_str()); | |||||
return false;); | |||||
std::vector<int64_t> offsets_for_fusion = {}; | |||||
bool has_lx_fusion_attr = | |||||
AttrUtils::GetListInt(node_desc, ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, offsets_for_fusion); | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | ||||
auto out_anchor = n->GetOutDataAnchor(out_index); | auto out_anchor = n->GetOutDataAnchor(out_index); | ||||
GE_IF_BOOL_EXEC(out_anchor == nullptr, | GE_IF_BOOL_EXEC(out_anchor == nullptr, | ||||
@@ -676,17 +719,16 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
return false;); | return false;); | ||||
// If GetBool fail, is_input_continuous is false. | // If GetBool fail, is_input_continuous is false. | ||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); | |||||
if (is_input_continuous) { | |||||
bool is_input_continuous_no_padding = false; | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, | |||||
is_input_continuous_no_padding); | |||||
if (is_input_continuous_no_padding) { | |||||
reset_zero_copy_flag = true; | reset_zero_copy_flag = true; | ||||
has_lx_fusion_attr = true; | |||||
} else { | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
return false; | |||||
} | } | ||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
// lx_fusion memory only assign first input, broadcast's input some are variable some are not, reassign later | |||||
GE_IF_BOOL_EXEC(is_input_continuous && | |||||
(CheckIsZeroMemNodeType(peer_node->GetType()) || (has_lx_fusion_attr && (peer_in_anchor->GetIdx() != 0))), | |||||
GE_IF_BOOL_EXEC(is_input_continuous && CheckIsZeroMemNodeType(peer_node->GetType()), | |||||
GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | GELOGI("Node[%s] output[%u] no_need_assign_memory.", n->GetName().c_str(), out_index); | ||||
no_need_assign_memory = true; | no_need_assign_memory = true; | ||||
return false;); | return false;); | ||||
@@ -700,10 +742,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
// Only set attr one times. | // Only set attr one times. | ||||
if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { | if (node_continuous_input_blocks_[peer_in_node_desc->GetName()].size() == 0) { | ||||
(void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | (void)ge::AttrUtils::SetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); | ||||
// lx fusion case assign max size for first block, so reuse as none continuous | |||||
GE_IF_BOOL_EXEC(has_lx_fusion_attr, | |||||
is_op_reuse_mem_ = IsContinuousMemoryReuse(n, peer_node, out_index); | |||||
return false;); | |||||
node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); | node_continuous_input_counts_[peer_in_node_desc->GetName()] = peer_node->GetAllInDataAnchorsSize(); | ||||
} | } | ||||
peer_input_index = peer_in_anchor->GetIdx(); | peer_input_index = peer_in_anchor->GetIdx(); | ||||
@@ -716,95 +754,6 @@ bool BlockMemAssigner::IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t ou | |||||
return false; | return false; | ||||
} | } | ||||
bool IsContinuousInputNodeMaxLife(const NodePtr &n, uint32_t out_index) { | |||||
if (n == nullptr) { | |||||
return false; | |||||
} | |||||
int64_t max_node_life_time = 0; | |||||
int64_t continuous_input_node_life_time = 0; | |||||
if (static_cast<size_t>(out_index) < n->GetAllOutDataAnchors().size()) { | |||||
auto out_anchor = n->GetOutDataAnchor(out_index); | |||||
if(out_anchor == nullptr) { | |||||
return false; | |||||
} | |||||
// continuous input node's life time should be max | |||||
for (auto const &peer_in_anchor : out_anchor->GetPeerInDataAnchors()) { | |||||
if ((peer_in_anchor == nullptr) || (peer_in_anchor->GetOwnerNode() == nullptr)){ | |||||
return false; | |||||
} | |||||
auto peer_in_node_desc = peer_in_anchor->GetOwnerNode()->GetOpDesc(); | |||||
GE_IF_BOOL_EXEC(peer_in_node_desc == nullptr, | |||||
GELOGE(FAILED, "Node[%s] output[%u] peer in node desc is null.", n->GetName().c_str(), out_index); | |||||
return false;); | |||||
if(peer_in_node_desc->GetId() > max_node_life_time) { | |||||
max_node_life_time = peer_in_node_desc->GetId(); | |||||
} | |||||
// If GetBool fail, is_input_continuous is false. | |||||
bool is_input_continuous = false; | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, is_input_continuous); | |||||
if (!is_input_continuous) { | |||||
(void)ge::AttrUtils::GetBool(peer_in_node_desc, ATTR_NAME_CONTINUOUS_INPUT, is_input_continuous); | |||||
} | |||||
if (is_input_continuous) { | |||||
continuous_input_node_life_time = peer_in_node_desc->GetId(); | |||||
} | |||||
} | |||||
} | |||||
return ((max_node_life_time != 0) && (continuous_input_node_life_time == max_node_life_time)) ; | |||||
} | |||||
/// | |||||
/// @ingroup GE | |||||
/// @brief Check continuous memory reuseable | |||||
/// @return void | |||||
/// | |||||
bool BlockMemAssigner::IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index) { | |||||
// n,peer_node_desc have been checked | |||||
auto node_desc = n->GetOpDesc(); | |||||
auto peer_node_desc = peer_node->GetOpDesc(); | |||||
continuous_life_begin_ = static_cast<size_t>(node_desc->GetId()); | |||||
// lx fusion case check all continuous input node, firt input node's life time should be min | |||||
for (const auto &in_anchor : peer_node->GetAllInDataAnchors()) { | |||||
if ((in_anchor == nullptr) || (in_anchor->GetPeerOutAnchor() == nullptr) || | |||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode() == nullptr) || | |||||
(in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc() == nullptr)) { | |||||
GELOGE(FAILED, "Node[%s] output[%u] peer input node desc is null.", n->GetName().c_str(), out_index); | |||||
return false; | |||||
} | |||||
auto peer_out_node_desc = in_anchor->GetPeerOutAnchor()->GetOwnerNode()->GetOpDesc(); | |||||
/// | |||||
/// node2 node1 node3 | |||||
/// | / / | | |||||
/// node5 node6 | |||||
/// firt input node's life time is not min | |||||
/// when node5's first input node2's life time is not min(node2 > node1), use node1's life time to reuse | |||||
/// | |||||
if (static_cast<size_t>(peer_out_node_desc->GetId()) < continuous_life_begin_) { | |||||
continuous_life_begin_ = static_cast<size_t>(peer_out_node_desc->GetId()); | |||||
GELOGI( | |||||
"Node[%s] life[%ld] output[%u] is not continuous input node[%s] life[%ld]'s min life time," | |||||
"min is node[%s] life[%zu]", | |||||
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), | |||||
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), continuous_life_begin_); | |||||
} | |||||
// when node3's output node5's life time is not max(node6 > node5), not reuse | |||||
if (!IsContinuousInputNodeMaxLife(in_anchor->GetPeerOutAnchor()->GetOwnerNode(), | |||||
in_anchor->GetPeerOutAnchor()->GetIdx())) { | |||||
GELOGI( | |||||
"Node[%s] life[%ld] output[%u]'s continuous input node[%s] life[%ld]'s is not node[%s] output[%d]'s " | |||||
"max life node", | |||||
n->GetName().c_str(), node_desc->GetId(), out_index, peer_node_desc->GetName().c_str(), | |||||
peer_node_desc->GetId(), peer_out_node_desc->GetName().c_str(), in_anchor->GetPeerOutAnchor()->GetIdx()); | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
/// @brief Check pre_reuse flag & post_reuse glag for each symbol | /// @brief Check pre_reuse flag & post_reuse glag for each symbol | ||||
@@ -1090,9 +1039,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | GE_IF_BOOL_EXEC(reusable_block->batch_label_ != batch_label, continue); | ||||
// A node can reuse blocks of the same stream and preorder streams | // A node can reuse blocks of the same stream and preorder streams | ||||
if (CanReuseBlock(continuous_life_begin_, *reusable_block, block_size)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, | |||||
real_size, no_align_size); | |||||
if (CanReuseBySize(reusable_block_counts_, *reusable_block, block_size, real_size, continuous)) { | |||||
reusable_block->AddNodeTypeIndex({n, mem_type, out_index, false}, real_size, no_align_size); | |||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | auto iter = anchor_to_symbol_.find(NodeIndexIO(n, out_index, kOut).ToString()); | ||||
if (iter != anchor_to_symbol_.end()) { | if (iter != anchor_to_symbol_.end()) { | ||||
@@ -1101,6 +1049,7 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
} | } | ||||
reusable_block->continuous_block_ = continuous; | reusable_block->continuous_block_ = continuous; | ||||
reusable_block->ref_count_++; | reusable_block->ref_count_++; | ||||
ReduceReusableBlockCount(*reusable_block, reusable_block_counts_); | |||||
reusable_blocks_[memory_type][stream_id].erase((++it).base()); | reusable_blocks_[memory_type][stream_id].erase((++it).base()); | ||||
return reusable_block; | return reusable_block; | ||||
} | } | ||||
@@ -1113,7 +1062,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
// Data and netoutput need zero copy block | // Data and netoutput need zero copy block | ||||
block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | block->is_zero_copy_ = IsZeroCopyBlock(n, continuous); | ||||
block->AddNodeTypeIndex({n, mem_type, out_index, false, continuous_life_begin_}, real_size, no_align_size); | |||||
block->Init(real_size, mem_type, n, out_index, no_align_size, node_op_desc->GetStreamId()); | |||||
block->stream_id_ = node_op_desc->GetStreamId(); | block->stream_id_ = node_op_desc->GetStreamId(); | ||||
block->ref_count_++; | block->ref_count_++; | ||||
block->continuous_block_ = continuous; | block->continuous_block_ = continuous; | ||||
@@ -1131,18 +1081,73 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size, | |||||
return block; | return block; | ||||
} | } | ||||
MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||||
const bool is_op_reuse_mem) { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return nullptr, "input node is null."); | |||||
bool IsOutputIndexRef(const OpDescPtr &op_desc, uint32_t index) { | |||||
auto output_tensor = op_desc->GetOutputDescPtr(index); | |||||
bool dst_reuse_input = false; | |||||
(void)ge::TensorUtils::GetReuseInput(*output_tensor, dst_reuse_input); | |||||
if (dst_reuse_input) { | |||||
return true; | |||||
} | |||||
bool is_ref = false; | |||||
(void)ge::AttrUtils::GetBool(op_desc, ATTR_NAME_REFERENCE, is_ref); | |||||
if (is_ref) { | |||||
string output_name = op_desc->GetOutputNameByIndex(index); | |||||
for (const auto &input_name : op_desc->GetAllInputNames()) { | |||||
if (output_name == input_name) { | |||||
return true;; | |||||
} | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
void BlockMemAssigner::ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, | |||||
const NodePtr &n) { | |||||
const auto node_op_desc = n->GetOpDesc(); | |||||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | |||||
if (!IsOutputIndexRef(node_op_desc, index)) { | |||||
isAllOutputRef = false; | |||||
break; | |||||
} else { | |||||
zero_memory_list_.emplace_back(n, kOutput, index); | |||||
isOutputHasRef = true; | |||||
} | |||||
} | |||||
} | |||||
Status BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, | |||||
const bool is_op_reuse_mem) { | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(n == nullptr, return INTERNAL_ERROR, "input node is null."); | |||||
auto node_op_desc = n->GetOpDesc(); | auto node_op_desc = n->GetOpDesc(); | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return nullptr, "node_op_desc is null."); | |||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(node_op_desc == nullptr, return INTERNAL_ERROR, "node_op_desc is null."); | |||||
// continuous output support ref only when all output ref input | |||||
bool isAllOutputRef = true; | |||||
bool isOutputHasRef = false; | |||||
ContinuousOutRefCheck(isAllOutputRef, isOutputHasRef, n); | |||||
if (isAllOutputRef) { | |||||
GELOGI("continuous output node ref all input, skip continuous alloc, node_name:%s", n->GetName().c_str()); | |||||
return SUCCESS; | |||||
} | |||||
if (!isAllOutputRef && isOutputHasRef) { | |||||
GELOGE(INTERNAL_ERROR, "continuous output node ref part input, not support this situation, node_name:%s", | |||||
n->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | |||||
MemoryBlock *block = nullptr; | MemoryBlock *block = nullptr; | ||||
int64_t total_size = 0; | int64_t total_size = 0; | ||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | for (uint32_t index = 0; index < static_cast<uint32_t>(node_op_desc->GetOutputsSize()); index++) { | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
if (output_op_desc == nullptr) { | if (output_op_desc == nullptr) { | ||||
return nullptr; | |||||
GELOGE(INTERNAL_ERROR, "Get output desc failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
if (CheckIsZeroMemNodeType(n->GetType())) { | if (CheckIsZeroMemNodeType(n->GetType())) { | ||||
@@ -1152,8 +1157,8 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||||
int64_t size = 0; | int64_t size = 0; | ||||
if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | if (ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS) { | ||||
GELOGI("Get size failed"); | |||||
return nullptr; | |||||
GELOGE(INTERNAL_ERROR, "Get size failed, node_name:%s, output_index:%u", n->GetName().c_str(), index); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
size_t align_size = static_cast<size_t>(size); | size_t align_size = static_cast<size_t>(size); | ||||
AlignMemOffset(align_size); | AlignMemOffset(align_size); | ||||
@@ -1176,7 +1181,7 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||||
} | } | ||||
if (total_size == 0) { | if (total_size == 0) { | ||||
return nullptr; | |||||
return SUCCESS; | |||||
} | } | ||||
auto block_size = GetBlockSize(total_size, ranges); | auto block_size = GetBlockSize(total_size, ranges); | ||||
@@ -1190,8 +1195,11 @@ MemoryBlock *BlockMemAssigner::ApplyContinuousMemory(const NodePtr &n, const vec | |||||
// hccl task need align header and tail | // hccl task need align header and tail | ||||
block->first_continuous_block_ = true; | block->first_continuous_block_ = true; | ||||
block->last_continuous_block_ = true; | block->last_continuous_block_ = true; | ||||
} else { | |||||
GELOGE(INTERNAL_ERROR, "node apply continuous output memory failed. node_name:%s", n->GetName().c_str()); | |||||
return INTERNAL_ERROR; | |||||
} | } | ||||
return block; | |||||
return SUCCESS; | |||||
} | } | ||||
MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, const vector<int64_t> &ranges, | ||||
@@ -1203,9 +1211,8 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
NodeIndexIO node_index_io(n, index, kOut); | NodeIndexIO node_index_io(n, index, kOut); | ||||
int64_t size = 0; | int64_t size = 0; | ||||
auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | auto output_op_desc = node_op_desc->GetOutputDescPtr(index); | ||||
if (output_op_desc != nullptr) { | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
} | |||||
GE_IF_BOOL_EXEC(output_op_desc == nullptr, return nullptr); | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | |||||
size_t no_align_size = 0; | size_t no_align_size = 0; | ||||
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(GetNoAlignSize(*node_op_desc, index, no_align_size) != SUCCESS, | ||||
return nullptr, "Get no align size failed"); | return nullptr, "Get no align size failed"); | ||||
@@ -1213,24 +1220,16 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
std::string symbol; | std::string symbol; | ||||
if (IsSymbolExist(node_index_io, symbol)) { | if (IsSymbolExist(node_index_io, symbol)) { | ||||
block = symbol_blocks_[symbol]; | block = symbol_blocks_[symbol]; | ||||
GE_IF_BOOL_EXEC(block == nullptr, GELOGE(FAILED, "Node %s ref block is nullptr.", node_op_desc->GetName().c_str()); | |||||
return nullptr); | |||||
// reduce old size | |||||
size_t align_size = block->Size(); | |||||
AlignMemOffset(align_size); | |||||
theory_memory_size_ -= align_size; | |||||
auto block_size = GetBlockSize(size, ranges); | |||||
block->SetSize(block_size); | |||||
block->SetLifeTimeEnd(life_time_); | |||||
block->AddNodeTypeIndex({n, kOutput, index, true, continuous_life_begin_}, size, no_align_size); | |||||
block->AddNodeTypeIndex({n, kOutput, index, true}, size, no_align_size); | |||||
block->ref_count_++; | block->ref_count_++; | ||||
// add new size | |||||
align_size = block_size; | |||||
AlignMemOffset(align_size); | |||||
theory_memory_size_ += align_size; | |||||
} else { | } else { | ||||
// if ref input is variable, can not find symbol, must judge alone | |||||
if (IsOutputIndexRef(node_op_desc, index)) { | |||||
zero_memory_list_.emplace_back(n, kOutput, index, false); | |||||
GELOGI("ref mode skip out block assign. node_name: %s, index:%d", n->GetName().c_str(), index); | |||||
return nullptr; | |||||
} | |||||
int64_t max_size = size; | int64_t max_size = size; | ||||
int64_t memory_type = RT_MEMORY_HBM; | int64_t memory_type = RT_MEMORY_HBM; | ||||
auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); | auto iter1 = anchor_to_symbol_.find(node_index_io.ToString()); | ||||
@@ -1282,6 +1281,7 @@ MemoryBlock *BlockMemAssigner::ApplyOutMemory(const NodePtr &n, uint32_t index, | |||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | GE_IF_BOOL_EXEC(ge::TensorUtils::GetReuseInputIndex(*owner_node_op_desc, dst_reuse_input_index) != SUCCESS, | ||||
GELOGI("Get dst_reuse_input_index failed")); | GELOGI("Get dst_reuse_input_index failed")); | ||||
if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | if (dst_reuse_input && (dst_reuse_input_index == static_cast<uint32_t>(in_anchor->GetIdx()))) { | ||||
block->AddNodeTypeIndex({owner_node, kOutput, i, true}, block->Size(), block->Size()); | |||||
out_count_reuse_input += 1; | out_count_reuse_input += 1; | ||||
reuse_input = true; | reuse_input = true; | ||||
} | } | ||||
@@ -1322,7 +1322,7 @@ bool IsAtomicOutputMemory(const ge::NodePtr &node, uint32_t output_index, bool i | |||||
if (static_cast<uint32_t>(index) == output_index) { | if (static_cast<uint32_t>(index) == output_index) { | ||||
if (node->GetOwnerComputeGraph() != nullptr) { | if (node->GetOwnerComputeGraph() != nullptr) { | ||||
string graph_name = node->GetOwnerComputeGraph()->GetName(); | string graph_name = node->GetOwnerComputeGraph()->GetName(); | ||||
GELOGD("Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||||
GELOGD("[IMAS]Atomic no assign %s name[%s] output[%ld] streamid[%ld].", graph_name.c_str(), | |||||
op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | op_desc->GetName().c_str(), index, op_desc->GetStreamId()); | ||||
} | } | ||||
return true; | return true; | ||||
@@ -1360,6 +1360,7 @@ void BlockMemAssigner::ReleaseMemory(MemoryBlock *to_release, vector<MemoryBlock | |||||
if (to_release->same_stream_) { | if (to_release->same_stream_) { | ||||
to_release->SetLifeTimeEnd(life_time_); | to_release->SetLifeTimeEnd(life_time_); | ||||
reusable_memory.emplace_back(to_release); | reusable_memory.emplace_back(to_release); | ||||
AddReusableBlockCount(*to_release, reusable_block_counts_); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -1459,7 +1460,6 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
} | } | ||||
is_op_reuse_mem_ = true; | is_op_reuse_mem_ = true; | ||||
continuous_life_begin_ = 0; | |||||
if (op_reuse_env_valid_ == true) { | if (op_reuse_env_valid_ == true) { | ||||
vector<string>::iterator it_name = | vector<string>::iterator it_name = | ||||
std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); | std::find(op_no_reuse_mem_vec_.begin(), op_no_reuse_mem_vec_.end(), op_desc->GetName()); | ||||
@@ -1477,8 +1477,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | for (auto iter = stream_workspace_blocks_.begin(); iter != stream_workspace_blocks_.end(); | ||||
++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | ++iter) { ReleaseMemorys(iter->second[stream_id], reusable_blocks_[iter->first][stream_id]); }); | ||||
if (IsContinuousOutput(node)) { | if (IsContinuousOutput(node)) { | ||||
(void)ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||||
return SUCCESS; | |||||
return ApplyContinuousMemory(node, ranges, is_op_reuse_mem_); | |||||
} | } | ||||
for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | for (uint32_t i = 0; i < static_cast<uint32_t>(op_desc->GetOutputsSize()); i++) { | ||||
int64_t size = 0; | int64_t size = 0; | ||||
@@ -1486,6 +1485,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
if (output_op_desc != nullptr) { | if (output_op_desc != nullptr) { | ||||
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_op_desc, size) != SUCCESS, GELOGI("Get size failed")); | ||||
} | } | ||||
// fusion: other type's size not means malloc HBM memory | // fusion: other type's size not means malloc HBM memory | ||||
bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | bool l1_flag = has_mem_type_attr && memorys_type[i] == RT_MEMORY_L1; | ||||
if (l1_flag) { | if (l1_flag) { | ||||
@@ -1493,6 +1493,11 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | op_desc->GetName().c_str(), op_desc->GetOutputNameByIndex(i).c_str(), memorys_type[i]); | ||||
size = 0; | size = 0; | ||||
} | } | ||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(output_op_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
GE_IF_BOOL_EXEC((ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))), size = 0;); | |||||
std::string peer_name; | std::string peer_name; | ||||
uint32_t peer_input_index = 0; | uint32_t peer_input_index = 0; | ||||
bool out_node_set_continuous_input = false; | bool out_node_set_continuous_input = false; | ||||
@@ -1511,7 +1516,7 @@ Status BlockMemAssigner::AssignOutputMemoryWithReuse(const NodePtr &node, vector | |||||
continue; | continue; | ||||
} | } | ||||
// atomic can't be reused | // atomic can't be reused | ||||
bool need_change = is_op_reuse_mem_ && is_atomic; | |||||
bool need_change = is_op_reuse_mem_ && out_node_set_continuous_input && is_atomic; | |||||
if (need_change) { | if (need_change) { | ||||
is_op_reuse_mem_ = false; | is_op_reuse_mem_ = false; | ||||
} | } | ||||
@@ -1904,12 +1909,11 @@ void SetOffsetSize(const NodeTypeIndex &node_type, const MemoryBlock *block, | |||||
} | } | ||||
op_desc->SetWorkspace(workspace_list); | op_desc->SetWorkspace(workspace_list); | ||||
} | } | ||||
GELOGI("[IMAS]Set %s name[%s] optype[%s] %s[%u] offset to [%ld] streamid[%ld] memtype[%ld] size[%zu] realsize[%zu] " | |||||
"noalignsize[%zu] life time begin[%s] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", | |||||
graph_name.c_str(), op_desc->GetName().c_str(), node_type.node->GetType().c_str(), | |||||
node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(),block->memory_type_, | |||||
block->Size(), real_size, no_align_size, node_type.GetLifeBeginDesc().c_str(), end, child_block_level, | |||||
block->reuse_mem_, block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | |||||
GELOGI("[IMAS]Set %s name[%s] %s[%u] offset to [%ld] streamid[%ld] size[%zu] realsize[%zu] noalignsize[%zu] " | |||||
"life time begin[%zu] life time end[%zu] child[%d:%d:%d:%d:%d] isref[%d] batch[%s]", graph_name.c_str(), | |||||
op_desc->GetName().c_str(), node_type.GetMemType().c_str(), node_type.index, offset, op_desc->GetStreamId(), | |||||
block->Size(), real_size, no_align_size, op_desc->GetId(), end, child_block_level, block->reuse_mem_, | |||||
block->continuous_block_, block->is_zero_copy_, block->same_stream_, node_type.ref_input, | |||||
block->batch_label_.c_str()); | block->batch_label_.c_str()); | ||||
} | } | ||||
@@ -1973,9 +1977,8 @@ Status BlockMemAssigner::Assign() { | |||||
bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | bool BlockMemAssigner::CheckIsZeroMemNodeType(const string &node_type) const { | ||||
return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | return (node_type == VARIABLE) || (node_type == CONSTANT) || (node_type == MULTISHAPE) || | ||||
(node_type == HCOMBROADCAST) || (node_type == CONSTANTOP) || | |||||
(node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || (node_type == ASSIGN) || (node_type == HVDWAIT) || | |||||
(node_type == HVDCALLBACKBROADCAST); | |||||
(node_type == CONSTANTOP) || (node_type == ASSIGNADD) || (node_type == ASSIGNSUB) || | |||||
(node_type == ASSIGN) || (node_type == HVDWAIT); | |||||
} | } | ||||
bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | bool BlockMemAssigner::GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type) { | ||||
@@ -39,15 +39,14 @@ using DependStreamLife = std::map<int64_t, std::map<int64_t, size_t>>; | |||||
enum OpMemoryType { kOutput, kWorkspace }; | enum OpMemoryType { kOutput, kWorkspace }; | ||||
struct NodeTypeIndex { | struct NodeTypeIndex { | ||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false, size_t begin = 0) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input), life_time_begin(begin) {} | |||||
NodeTypeIndex(ge::NodePtr node, OpMemoryType mem_type, uint32_t index, bool ref_input = false) | |||||
: node(std::move(node)), mem_type(mem_type), index(index), ref_input(ref_input) {} | |||||
ge::NodePtr node = nullptr; | ge::NodePtr node = nullptr; | ||||
OpMemoryType mem_type = kOutput; | OpMemoryType mem_type = kOutput; | ||||
uint32_t index = 0; | uint32_t index = 0; | ||||
bool ref_input = false; | |||||
size_t life_time_begin = 0; | |||||
size_t life_time_end = kMaxLifeTime; | size_t life_time_end = kMaxLifeTime; | ||||
bool ref_input = false; | |||||
const string GetMemType() const { | const string GetMemType() const { | ||||
if (mem_type == kOutput) { | if (mem_type == kOutput) { | ||||
return "output"; | return "output"; | ||||
@@ -56,34 +55,6 @@ struct NodeTypeIndex { | |||||
} | } | ||||
return "unknown"; | return "unknown"; | ||||
} | } | ||||
size_t GetLifeBegin() const { | |||||
if ((node == nullptr) || (node->GetOpDesc() == nullptr)) { | |||||
return 0; | |||||
} | |||||
if ((life_time_begin > 0) && (life_time_begin < static_cast<size_t>(node->GetOpDesc()->GetId()))) { | |||||
return life_time_begin; | |||||
} else { | |||||
return node->GetOpDesc()->GetId(); | |||||
} | |||||
} | |||||
std::string GetLifeBeginDesc() const { | |||||
if (node == nullptr) { | |||||
return ""; | |||||
} | |||||
auto node_op_desc = node->GetOpDesc(); | |||||
if (node_op_desc != nullptr) { | |||||
auto life_begin = GetLifeBegin(); | |||||
if (life_begin != static_cast<size_t>(node_op_desc->GetId())) { | |||||
return std::to_string(life_begin) + "-" + std::to_string(node_op_desc->GetId()); | |||||
} else { | |||||
return std::to_string(node_op_desc->GetId()); | |||||
} | |||||
} | |||||
return ""; | |||||
} | |||||
}; | }; | ||||
class MemoryBlock { | class MemoryBlock { | ||||
@@ -115,13 +86,16 @@ class MemoryBlock { | |||||
symbol_list_.clear(); | symbol_list_.clear(); | ||||
} | } | ||||
size_t Size() const { return block_size_; } | |||||
void SetSize(size_t size) { | |||||
if (size > block_size_) { | |||||
block_size_ = size; | |||||
void Init(size_t real_size, OpMemoryType type, const ge::NodePtr &node, uint32_t out_index, size_t no_align_size, | |||||
int64_t stream_id) { | |||||
real_size_list_.emplace_back(real_size); | |||||
no_align_size_list_.emplace_back(no_align_size); | |||||
node_type_index_list_.emplace_back(node, type, out_index, false); | |||||
if (stream_id != stream_id_) { | |||||
same_stream_ = false; | |||||
} | } | ||||
} | } | ||||
size_t Size() const { return block_size_; } | |||||
size_t AlignSize() const; | size_t AlignSize() const; | ||||
@@ -169,7 +143,7 @@ class MemoryBlock { | |||||
size_t GetLifeBegin(); | size_t GetLifeBegin(); | ||||
size_t GetLifeEnd() const; | |||||
size_t GetLifeEnd(); | |||||
void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | void AddDependLifeBegin(DependStreamLife &node_depend_stream_life); | ||||
@@ -432,7 +406,6 @@ class BlockMemAssigner : public MemAssigner { | |||||
bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | bool IsOutNodeSetContinuousInput(const NodePtr &n, uint32_t out_index, std::string &peer_name, | ||||
uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); | uint32_t &peer_input_index, bool &no_need_assign_memory, bool &reset_zero_copy_flag); | ||||
bool IsContinuousMemoryReuse(const NodePtr &n, const NodePtr &peer_node, uint32_t out_index); | |||||
/// | /// | ||||
/// @ingroup GE | /// @ingroup GE | ||||
/// @|+++++++++block1++++++++| |+++++++++block1++++++++| | /// @|+++++++++block1++++++++| |+++++++++block1++++++++| | ||||
@@ -448,10 +421,16 @@ class BlockMemAssigner : public MemAssigner { | |||||
bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | bool GetWorkSpaceMemoryType(const NodePtr &node, size_t index, int64_t &memory_type); | ||||
MemoryBlock *ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||||
void ContinuousOutRefCheck(bool &isAllOutputRef, bool &isOutputHasRef, const NodePtr &n); | |||||
Status ApplyContinuousMemory(const NodePtr &n, const vector<int64_t> &ranges, const bool is_op_reuse_mem); | |||||
void MarkContinuousAllocedForOneInputFromVariable(const NodePtr &node); | |||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> reusable_blocks_; | ||||
std::map<std::string, uint64_t> reusable_block_counts_; | |||||
std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | std::unordered_map<int64_t, std::unordered_map<int64_t, std::vector<MemoryBlock *>>> stream_workspace_blocks_; | ||||
std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_; | std::unordered_map<std::string, std::vector<MemoryBlock *>> node_out_blocks_; | ||||
@@ -481,7 +460,6 @@ class BlockMemAssigner : public MemAssigner { | |||||
std::string max_batch_label_; | std::string max_batch_label_; | ||||
size_t continuous_life_begin_ = 0; | |||||
/// | /// | ||||
/// @ [stream1][nodeid] | /// @ [stream1][nodeid] | ||||
/// @[nodeid] [stream2][nodeid] | /// @[nodeid] [stream2][nodeid] | ||||
@@ -119,15 +119,31 @@ class GraphMemoryAssigner { | |||||
/// | /// | ||||
ge::Status ReAssignContinuousMemory(bool is_loop_graph); | ge::Status ReAssignContinuousMemory(bool is_loop_graph); | ||||
ge::Status ReAssignReuseAndNoPaddingContinuousInputMemory(); | |||||
ge::Status ReAssignReuseAndNoPaddingContinuousOutputMemory(); | |||||
ge::Status ReAssignVirtualInputNodeMemory(NodePtr node, size_t &mem_offset_reuse); | |||||
ge::Status ReAssignVirtualOutputNodeMemory(NodePtr node, size_t &mem_offset_reuse); | |||||
ge::Status ReAssignVirtualNodesMemory(map<string, vector<NodePtr>> &mem_reuse_nodes_map, int32_t mem_reuse_model); | |||||
ge::Status GetMaxBatchLabel(const map<string, vector<NodePtr>> &mem_reuse_virtual_nodes_map, | |||||
int32_t mem_reuse_model, string &max_batch_label); | |||||
ge::Status CalculateTensorRealSizeAndOutSize(const ge::ConstGeTensorDescPtr &output_desc, int64_t dim_index, | |||||
int64_t &output_mem_size, int64_t &batch_dim_num, int64_t &out_size); | |||||
ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ge::Status ReAssignAtomicMemory(bool is_loop_graph); | ||||
ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ge::Status FilterAtomicNodesForMemoryAssign(map<string, map<NodePtr, vector<NodePtr>>> &normal_atomic_nodes_map, | ||||
map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | map<string, vector<NodePtr>> &connecting_output_atomic_nodes); | ||||
ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, | ||||
int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); | |||||
int64_t &continuous_mem_size, int64_t memory_type); | |||||
ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); | |||||
ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node); | |||||
/// | /// | ||||
/// @brief check the input of node whether support atomic attr | /// @brief check the input of node whether support atomic attr | ||||
@@ -153,10 +169,10 @@ class GraphMemoryAssigner { | |||||
ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | ge::Status AssignConnectNetOutputAtomicMemory(vector<NodePtr> &connect_netoutput_nodes); | ||||
ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ge::Status SetIndependentAtomicAttr(const ge::NodePtr &node, int64_t atomic_mem_start, | ||||
const std::vector<int64_t> &mem_offset_end, int64_t memory_type); | |||||
const std::vector<int64_t> &mem_offset_end); | |||||
ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | ge::Status SetAtomicCleanAttr(const ge::NodePtr &node, const std::vector<int64_t> &atomic_mem_start, | ||||
const std::vector<int64_t> &atomic_mem_size, int64_t memory_type); | |||||
const std::vector<int64_t> &atomic_mem_size); | |||||
ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ge::Status IsIndependentAtomicClean(const ge::NodePtr &node, bool &is_independent_atomic_clean_node); | ||||
@@ -60,9 +60,14 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||||
return FAILED); | return FAILED); | ||||
ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); | ge::ConstGeTensorDescPtr tensor_desc = n->GetOpDesc()->GetOutputDescPtr(0); | ||||
GE_CHECK_NOTNULL(tensor_desc); | GE_CHECK_NOTNULL(tensor_desc); | ||||
rtMemType_t memory_type = RT_MEMORY_HBM; | |||||
uint32_t mem_type = 0; | |||||
if (AttrUtils::GetInt(n->GetOpDesc(), ATTR_OUTPUT_MEMORY_TYPE, mem_type) && (mem_type == 1)) { | |||||
memory_type = RT_MEMORY_RDMA_HBM; | |||||
} | |||||
if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { | if (!VarManager::Instance(compute_graph->GetSessionID())->IsVarExist(node_name, *tensor_desc)) { | ||||
GE_CHK_STATUS_RET( | GE_CHK_STATUS_RET( | ||||
VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, RT_MEMORY_HBM)); | |||||
VarManager::Instance(compute_graph->GetSessionID())->AssignVarMem(node_name, *tensor_desc, memory_type)); | |||||
GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, | GE_IF_BOOL_EXEC(n->GetType() == VARIABLE, | ||||
GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); | GE_CHK_STATUS_RET(AssignData2Fp32Var(n, compute_graph->GetSessionID()))); | ||||
GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | ||||
@@ -70,7 +75,6 @@ Status VarMemAssignUtil::AssignStaticMemory2Node(ge::ComputeGraphPtr &compute_gr | |||||
} | } | ||||
uint8_t *dev_ptr = nullptr; | uint8_t *dev_ptr = nullptr; | ||||
rtMemType_t memory_type = RT_MEMORY_HBM; | |||||
GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | GE_CHK_STATUS_RET(VarManager::Instance(compute_graph->GetSessionID()) | ||||
->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); | ->GetVarAddr(node_name, *tensor_desc, &dev_ptr, memory_type)); | ||||
vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset(); | vector<int64_t> output_list = n->GetOpDesc()->GetOutputOffset(); | ||||
@@ -1013,6 +1013,24 @@ bool StreamAllocator::IsActivated(int64_t stream_id) const { | |||||
return false; | return false; | ||||
} | } | ||||
// Iteraotor loop : | |||||
// StreamSwitch -> StreamActive | |||||
// FpBp loop: | |||||
// StreamSwitch -> AssignAdd -> StreamActive | |||||
NodePtr FindSwitchNodeBeforeLoopActiveNode(const NodePtr &active_node) { | |||||
for (auto pre_node : active_node->GetInControlNodes()) { | |||||
if (pre_node->GetType() == STREAMSWITCH) { | |||||
return pre_node; | |||||
} | |||||
for (auto pre_pre_node : pre_node->GetInControlNodes()) { | |||||
if (pre_pre_node->GetType() == STREAMSWITCH) { | |||||
return pre_pre_node; | |||||
} | |||||
} | |||||
} | |||||
return nullptr; | |||||
} | |||||
Status StreamAllocator::SetActiveStreamsForLoop() { | Status StreamAllocator::SetActiveStreamsForLoop() { | ||||
vector<uint32_t> loop_active_streams; | vector<uint32_t> loop_active_streams; | ||||
for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | for (int64_t stream_id = 0; stream_id < stream_num_; stream_id++) { | ||||
@@ -1038,6 +1056,13 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
bool is_loop_active = false; | bool is_loop_active = false; | ||||
if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | if (AttrUtils::GetBool(node->GetOpDesc(), ATTR_NAME_IS_LOOP_ACTIVE, is_loop_active) && is_loop_active) { | ||||
vector<string> activated_label_list; | vector<string> activated_label_list; | ||||
NodePtr pre_switch_node = FindSwitchNodeBeforeLoopActiveNode(node); | |||||
if (pre_switch_node == nullptr) { | |||||
GELOGE(FAILED, "find switch node before loop active node %s failed", node->GetName().c_str()); | |||||
return FAILED; | |||||
} | |||||
if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | if (!AttrUtils::GetListStr(node->GetOpDesc(), ATTR_NAME_ACTIVE_LABEL_LIST, activated_label_list) || | ||||
activated_label_list.empty()) { | activated_label_list.empty()) { | ||||
GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), | GE_CHK_BOOL_EXEC(AttrUtils::SetListInt(node->GetOpDesc(), ATTR_NAME_ACTIVE_STREAM_LIST, loop_active_streams), | ||||
@@ -1053,7 +1078,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
// it may cause some stream actived by iterator next step when this stream still alive. | // it may cause some stream actived by iterator next step when this stream still alive. | ||||
// If above situation happen, active message will lose, cause process block in next iteration. | // If above situation happen, active message will lose, cause process block in next iteration. | ||||
// In order to avoid this abnormal happen, | // In order to avoid this abnormal happen, | ||||
// add event between each last node and iterator active node in target active stream | |||||
// add event between each last node and iterator switch node | |||||
GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); | GELOGI("there are %zu next iterator target streams has streamswitch node.", streams_skip_iterator_event.size()); | ||||
for (auto iter : stream_id_to_last_node) { | for (auto iter : stream_id_to_last_node) { | ||||
if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { | if (streams_skip_iterator_event.find(iter.first) != streams_skip_iterator_event.end()) { | ||||
@@ -1067,7 +1092,7 @@ Status StreamAllocator::SetActiveStreamsForLoop() { | |||||
continue; | continue; | ||||
} | } | ||||
AddSendEventId(iter.second, event_num_); | AddSendEventId(iter.second, event_num_); | ||||
AddRecvEventId(node, event_num_); | |||||
AddRecvEventId(pre_switch_node, event_num_); | |||||
event_num_++; | event_num_++; | ||||
} | } | ||||
@@ -466,11 +466,10 @@ Status TaskGenerator::GenerateTaskForFusionNode(FusionTaskInfo &fusion_task_info | |||||
task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | task_def_ptr->set_ops_kernel_store_ptr(reinterpret_cast<uintptr_t>(ops_kernel_info_store_ptr)); | ||||
} | } | ||||
GELOGI( | |||||
"Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" | |||||
" task finished, generate %u task(s).", | |||||
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, | |||||
task_list_size_after - task_list_size_before); | |||||
GELOGI("Fusion: Call %s to generate fusion_node:[fusion_node_name:%s(%s), id:%ld, stream_id:%ld]" | |||||
" task finished, generate %zu task(s).", | |||||
op_kernel_lib_name.c_str(), fusion_node_name.c_str(), fusion_node_type.c_str(), op_id, stream_id, | |||||
task_list_size_after - task_list_size_before); | |||||
// record nodes which have call generate task successfully | // record nodes which have call generate task successfully | ||||
fusion_nodes_seen.insert(fusion_node.get()); | fusion_nodes_seen.insert(fusion_node.get()); | ||||
@@ -527,13 +526,6 @@ Status TaskGenerator::MarkNodeAndSetIndex(ComputeGraphPtr &graph) { | |||||
return GE_GRAPH_GRAPH_NODE_NULL; | return GE_GRAPH_GRAPH_NODE_NULL; | ||||
} | } | ||||
int64_t node_index = 0; | |||||
for (auto &node : all_nodes) { | |||||
OpDescPtr op_desc = node->GetOpDesc(); | |||||
GE_CHECK_NOTNULL(op_desc); | |||||
op_desc->SetId(node_index++); | |||||
} | |||||
map<int64_t, vector<OpDescPtr>> all_stream_ops; | map<int64_t, vector<OpDescPtr>> all_stream_ops; | ||||
for (auto &node : all_nodes) { | for (auto &node : all_nodes) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
@@ -21,7 +21,7 @@ | |||||
#include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
#include "common/model_parser/base.h" | #include "common/model_parser/base.h" | ||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include "omm/csa_interact.h" | #include "omm/csa_interact.h" | ||||
#include "runtime/dev.h" | #include "runtime/dev.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" | ||||
@@ -22,8 +22,8 @@ | |||||
#include "common/helper/model_helper.h" | #include "common/helper/model_helper.h" | ||||
#include "common/util.h" | #include "common/util.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "omm/csa_interact.h" | #include "omm/csa_interact.h" | ||||
#include "runtime/dev.h" | #include "runtime/dev.h" | ||||
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/aipp_utils.h" | |||||
#include "graph/load/model_manager/aipp_utils.h" | |||||
#include <string> | #include <string> | ||||
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||||
#include "graph/load/model_manager/cpu_queue_schedule.h" | |||||
#include "common/debug/ge_log.h" | #include "common/debug/ge_log.h" | ||||
#include "common/debug/log.h" | #include "common/debug/log.h" | ||||
@@ -20,8 +20,8 @@ | |||||
#include <vector> | #include <vector> | ||||
#include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/zero_copy_offset.h" | |||||
#include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
namespace ge { | namespace ge { |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/data_dumper.h" | |||||
#include "graph/load/model_manager/data_dumper.h" | |||||
#include <cstdlib> | #include <cstdlib> | ||||
#include <ctime> | #include <ctime> | ||||
@@ -29,7 +29,7 @@ | |||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/anchor.h" | #include "graph/anchor.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "graph/manager/util/debug.h" | #include "graph/manager/util/debug.h" | ||||
#include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/data_inputer.h" | |||||
#include "graph/load/model_manager/data_inputer.h" | |||||
#include <securec.h> | #include <securec.h> | ||||
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include <graph/utils/node_utils.h> | #include <graph/utils/node_utils.h> | ||||
#include <algorithm> | #include <algorithm> | ||||
@@ -36,9 +36,9 @@ | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/ge_context.h" | #include "graph/ge_context.h" | ||||
#include "graph/graph.h" | #include "graph/graph.h" | ||||
#include "graph/load/new_model_manager/cpu_queue_schedule.h" | |||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/new_model_manager/tbe_handle_store.h" | |||||
#include "graph/load/model_manager/cpu_queue_schedule.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/tbe_handle_store.h" | |||||
#include "graph/manager/graph_mem_allocator.h" | #include "graph/manager/graph_mem_allocator.h" | ||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/manager/trans_var_data_utils.h" | #include "graph/manager/trans_var_data_utils.h" | ||||
@@ -446,23 +446,20 @@ void DavinciModel::InitRuntimeParams() { | |||||
runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | runtime_param_.mem_size, runtime_param_.weight_size, runtime_param_.var_size); | ||||
} | } | ||||
void DavinciModel::CheckHasHcomOp() { | |||||
Graph graph = ge_model_->GetGraph(); | |||||
auto compute_graph = GraphUtils::GetComputeGraph(graph); | |||||
if (compute_graph == nullptr) { | |||||
return; | |||||
} | |||||
void DavinciModel::CheckHasHcomOp(const ComputeGraphPtr &compute_graph) { | |||||
const set<string> hcom_opp_types({ | |||||
HCOMBROADCAST, HCOMALLGATHER, HCOMALLREDUCE, HCOMSEND, HCOMRECEIVE, HCOMREDUCESCATTER, | |||||
HVDCALLBACKALLREDUCE, HVDCALLBACKALLGATHER, HVDCALLBACKBROADCAST, HVDWAIT, HCOMREDUCE | |||||
}); | |||||
for (const auto &node : compute_graph->GetAllNodes()) { | for (const auto &node : compute_graph->GetAllNodes()) { | ||||
OpDescPtr op_desc = node->GetOpDesc(); | OpDescPtr op_desc = node->GetOpDesc(); | ||||
GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); | GE_IF_BOOL_EXEC(op_desc == nullptr, GELOGW("Node OpDesc is nullptr"); continue); | ||||
GE_IF_BOOL_EXEC(((op_desc->GetType() == HCOMBROADCAST) || (op_desc->GetType() == HCOMALLGATHER) || | |||||
(op_desc->GetType() == HCOMALLREDUCE) || (op_desc->GetType() == HCOMSEND) || | |||||
(op_desc->GetType() == HCOMRECEIVE) || (op_desc->GetType() == HCOMREDUCESCATTER) || | |||||
(op_desc->GetType() == HVDCALLBACKALLREDUCE) || (op_desc->GetType() == HVDCALLBACKALLGATHER) || | |||||
(op_desc->GetType() == HVDCALLBACKBROADCAST) || (op_desc->GetType() == HVDWAIT) || | |||||
(op_desc->GetType() == HCOMREDUCE)), | |||||
uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||||
(void)hcom_streams_.emplace(stream_id); GELOGD("hcom stream: %u.", stream_id); continue); | |||||
if (hcom_opp_types.count(op_desc->GetType()) > 0) { | |||||
uint32_t stream_id = static_cast<uint32_t>(op_desc->GetStreamId()); | |||||
hcom_streams_.emplace(stream_id); | |||||
GELOGD("hcom stream: %u.", stream_id); | |||||
} | |||||
} | } | ||||
} | } | ||||
@@ -641,7 +638,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
name_ = ge_model_->GetName(); | name_ = ge_model_->GetName(); | ||||
(void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); | (void)ge::AttrUtils::GetBool(ge_model_, ATTR_NAME_SWITCH_FOR_L1_FUSION, is_l1_fusion_enable_); | ||||
GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); | GELOGD("The value of ge.l1Fusion in ge_model is %d.", is_l1_fusion_enable_); | ||||
CheckHasHcomOp(); | |||||
CheckHasHcomOp(compute_graph); | |||||
vector<int64_t> huge_stream_list; | vector<int64_t> huge_stream_list; | ||||
(void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); | (void)ge::AttrUtils::GetListInt(ge_model_, ATTR_MODEL_HUGE_STREAM_LIST, huge_stream_list); | ||||
@@ -722,7 +719,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size | |||||
/// the aicpu opertor needs to destroy history record, and update operator memory address. | /// the aicpu opertor needs to destroy history record, and update operator memory address. | ||||
/// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). | /// The model with specified aicpu operators is only marked here, and destruction is in ModelManager::ExecuteModel(). | ||||
need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); | need_destroy_aicpu_kernel_ = IsAicpuKernelConnectSpecifiedLayer(); | ||||
(void)ge::AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name_); | |||||
string fp_ceiling_mode; | string fp_ceiling_mode; | ||||
if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | if (ge::AttrUtils::GetStr(ge_model_, ATTR_FP_CEILING_MODE, fp_ceiling_mode)) { | ||||
@@ -1028,7 +1024,7 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
const vector<OpDescPtr> &output_op_list) { | const vector<OpDescPtr> &output_op_list) { | ||||
GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | GELOGD("Data node size: %zu, NetOutput node size: %zu", data_by_index.size(), output_op_list.size()); | ||||
for (auto &item : data_by_index) { | for (auto &item : data_by_index) { | ||||
auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||||
const auto output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, item.second); | |||||
GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | GELOGD("Data node: %s, output addr size: %zu", item.second->GetName().c_str(), output_addrs.size()); | ||||
input_addrs_list_.emplace_back(output_addrs); | input_addrs_list_.emplace_back(output_addrs); | ||||
@@ -1036,14 +1032,18 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | GE_CHK_STATUS_RET(InitAippType(item.first, item.second, data_by_index), "Init AIPP Type failed"); | ||||
GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | GE_CHK_STATUS_RET(InitOrigInputInfo(item.first, item.second), "Init Orig input failed"); | ||||
GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | GE_CHK_STATUS_RET(InitAippInputOutputDims(item.first, item.second), "Init AIPP dims failed"); | ||||
GE_CHK_STATUS_RET(InitInputDescInfo(item.second), "Init input desc info failed"); | |||||
if (item.second->GetType() == AIPP_DATA_TYPE) { | if (item.second->GetType() == AIPP_DATA_TYPE) { | ||||
GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | GELOGI("This is dynamic aipp model, Node: %s", item.second->GetName().c_str()); | ||||
is_dynamic_aipp_ = true; | is_dynamic_aipp_ = true; | ||||
} | } | ||||
} | } | ||||
vector<string> out_node_name; | |||||
(void)AttrUtils::GetListStr(ge_model_, ATTR_MODEL_OUT_NODES_NAME, out_node_name); | |||||
GELOGD("Output node size: %zu, out nodes name: %zu", output_op_list.size(), out_node_name.size()); | |||||
for (const auto &op_desc : output_op_list) { | for (const auto &op_desc : output_op_list) { | ||||
auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||||
const auto input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, op_desc); | |||||
GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | GELOGD("NetOutput node: %s, input addr size: %zu", op_desc->GetName().c_str(), input_addrs.size()); | ||||
output_addrs_list_.emplace_back(input_addrs); | output_addrs_list_.emplace_back(input_addrs); | ||||
@@ -1061,10 +1061,11 @@ Status DavinciModel::GenInputOutputInfo(const map<uint32_t, OpDescPtr> &data_by_ | |||||
if (InitOutputTensorInfo(op_desc) != SUCCESS) { | if (InitOutputTensorInfo(op_desc) != SUCCESS) { | ||||
return INTERNAL_ERROR; | return INTERNAL_ERROR; | ||||
} | } | ||||
GE_CHK_STATUS_RET(InitOutputDescInfo(op_desc, out_node_name), "Init output desc info failed"); | |||||
} | } | ||||
GE_CHK_STATUS_RET(InitInputDescInfo(data_by_index), "Init input desc info failed"); | |||||
return InitOutputDescInfo(output_op_list); | |||||
return SUCCESS; | |||||
} | } | ||||
bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | bool DavinciModel::IsGetNextSinkDynamic(const OpDescPtr &op_desc) { | ||||
@@ -1809,7 +1810,7 @@ void DavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_inp | |||||
/// | /// | ||||
Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | ||||
if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { | if (!op_desc->HasAttr(ATTR_NAME_AIPP)) { | ||||
GELOGW("There is not AIPP related with index %u.", index); | |||||
GELOGW("there is not AIPP related with index %u.", index); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -1818,7 +1819,7 @@ Status DavinciModel::InitAippInfo(uint32_t index, const OpDescPtr &op_desc) { | |||||
GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | GE_CHK_BOOL_RET_STATUS(AttrUtils::GetNamedAttrs(op_desc, ATTR_NAME_AIPP, aipp_attr), GE_AIPP_NOT_EXIST, | ||||
"Data node do not contain param aipp!"); | "Data node do not contain param aipp!"); | ||||
GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | GE_CHK_STATUS_RET(OpUtils::ConvertAippParams(aipp_attr, &aipp_params), "get aipp params failed"); | ||||
GELOGI("Node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
GELOGI("node data: %s, type: %s, current index: %u, current node related input rank: %u", | |||||
op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); | op_desc->GetName().c_str(), op_desc->GetType().c_str(), index, aipp_params.related_input_rank()); | ||||
AippConfigInfo aipp_info; | AippConfigInfo aipp_info; | ||||
@@ -1875,7 +1876,7 @@ Status DavinciModel::InitAippType(uint32_t index, const OpDescPtr &op_desc, cons | |||||
(void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | (void)AttrUtils::GetStr(op_desc, ATTR_DATA_AIPP_DATA_NAME_MAP, releated_name); | ||||
for (const auto item : data_list) { | for (const auto item : data_list) { | ||||
if (item.second->GetName() == releated_name) { | if (item.second->GetName() == releated_name) { | ||||
GELOGI("Find aipp_data [%s] index %zu from index %u", releated_name.c_str(), item.first, index); | |||||
GELOGI("Find aipp_data [%s] index %u from index %u", releated_name.c_str(), item.first, index); | |||||
aipp_index = item.first; | aipp_index = item.first; | ||||
} | } | ||||
} | } | ||||
@@ -1980,27 +1981,24 @@ void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, | |||||
} | } | ||||
} | } | ||||
Status DavinciModel::InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index) { | |||||
for (const auto &item : data_by_index) { | |||||
const auto op_desc = item.second; | |||||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
Status DavinciModel::InitInputDescInfo(const OpDescPtr &op_desc) { | |||||
GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0)); | |||||
InputOutputDescInfo input; | |||||
ShapeDescription dims_info; | |||||
Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||||
InputOutputDescInfo input; | |||||
ShapeDescription dims_info; | |||||
Format format = op_desc->GetInputDescPtr(0)->GetFormat(); | |||||
CreateInputDimsInfo(op_desc, format, input.shape_info, dims_info); | |||||
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
input.name = op_desc->GetName(); | |||||
int64_t input_size = 0; | |||||
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
input.size = input_size; | |||||
input_formats_.push_back(format); | |||||
input_descs_.push_back(input); | |||||
input.data_type = op_desc->GetInputDescPtr(0)->GetDataType(); | |||||
input.name = op_desc->GetName(); | |||||
int64_t input_size = 0; | |||||
GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed."); | |||||
input.size = input_size; | |||||
input_formats_.push_back(format); | |||||
input_descs_.push_back(input); | |||||
input.shape_info = dims_info; | |||||
input_descs_dims_.push_back(input); | |||||
} | |||||
input.shape_info = dims_info; | |||||
input_descs_dims_.push_back(input); | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2066,35 +2064,31 @@ void DavinciModel::CreateOutput(uint32_t index, const OpDescPtr &op_desc, InputO | |||||
output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | output.data_type = op_desc->GetInputDescPtr(index)->GetDataType(); | ||||
} | } | ||||
Status DavinciModel::InitOutputDescInfo(const vector<OpDescPtr> &output_op_list) { | |||||
GELOGD("Output node size: %zu", output_op_list.size()); | |||||
for (const auto &op_desc : output_op_list) { | |||||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
for (uint32_t index = 0; index < out_size; index++) { | |||||
string output_name; | |||||
InputOutputDescInfo output; | |||||
uint32_t format_result; | |||||
CreateOutput(index, op_desc, output, format_result); | |||||
std::vector<std::string> src_name = op_desc->GetSrcName(); | |||||
std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||||
GE_CHK_BOOL_RET_STATUS(src_name.size() > index && src_index.size() > index, INTERNAL_ERROR, | |||||
"construct output_name failed."); | |||||
// forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||||
if (out_size == out_node_name_.size()) { | |||||
// neweast plan, the index will add to name during generate model. | |||||
bool contains_colon = out_node_name_[index].find(":") != std::string::npos; | |||||
output_name = | |||||
contains_colon ? out_node_name_[index] : out_node_name_[index] + ":" + std::to_string(src_index[index]); | |||||
} else { | |||||
output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + | |||||
std::to_string(src_index[index]); | |||||
} | |||||
output.name = output_name; | |||||
output_descs_.push_back(output); | |||||
output_formats_.push_back(format_result); | |||||
Status DavinciModel::InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name) { | |||||
uint32_t out_size = static_cast<uint32_t>(op_desc->GetInputsSize()); | |||||
for (uint32_t i = 0; i < out_size; ++i) { | |||||
string output_name; | |||||
InputOutputDescInfo output; | |||||
uint32_t format_result; | |||||
CreateOutput(i, op_desc, output, format_result); | |||||
std::vector<std::string> src_name = op_desc->GetSrcName(); | |||||
std::vector<int64_t> src_index = op_desc->GetSrcIndex(); | |||||
GE_CHK_BOOL_RET_STATUS(src_name.size() > i && src_index.size() > i, INTERNAL_ERROR, | |||||
"construct output_name failed."); | |||||
// forward compatbility, if old om has no out_node_name, need to return output follow origin way | |||||
if (out_size == out_node_name.size()) { | |||||
// neweast plan, the index will add to name during generate model. | |||||
bool contains_colon = out_node_name[i].find(":") != std::string::npos; | |||||
output_name = contains_colon ? out_node_name[i] : out_node_name[i] + ":" + std::to_string(src_index[i]); | |||||
} else { | |||||
output_name = string("output_") + std::to_string(i) + "_" + src_name[i] + "_" + std::to_string(src_index[i]); | |||||
} | } | ||||
output.name = output_name; | |||||
output_descs_.push_back(output); | |||||
output_formats_.push_back(format_result); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -2147,11 +2141,6 @@ Status DavinciModel::SyncVarData() { | |||||
RT_MEMCPY_HOST_TO_DEVICE)); | RT_MEMCPY_HOST_TO_DEVICE)); | ||||
} | } | ||||
for (const auto &item : broadcast_variable_) { | |||||
ret = VarManager::Instance(session_id_)->SyncVarData(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||||
item.first.c_str()); | |||||
} | |||||
return ret; | return ret; | ||||
} | } | ||||
@@ -2481,7 +2470,7 @@ Status DavinciModel::CopyOutputData(uint32_t data_id, OutputData &output_data, r | |||||
uint64_t buffer_length = buffer.length; | uint64_t buffer_length = buffer.length; | ||||
void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | void *buffer_addr = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(buffer.data)); | ||||
GELOGI("CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||||
GELOGI("[IMAS]CopyPlainData memcpy graph_%u type[F] output[%u] memaddr[%p] mem_size[%lu] datasize[%lu]", | |||||
runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | runtime_param_.graph_id, output.first, output.second.GetBasicAddr(), data_size, buffer_length); | ||||
GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | GE_CHK_RT_RET(rtMemcpy(buffer_addr, buffer_length, output.second.GetBasicAddr(), data_size, kind)); | ||||
idx++; | idx++; | ||||
@@ -2635,12 +2624,6 @@ Status DavinciModel::ReturnResult(uint32_t data_id, const bool rslt_flg, const b | |||||
/// | /// | ||||
Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | Status DavinciModel::ReturnNoOutput(uint32_t data_id) { | ||||
GELOGI("ReturnNoOutput model id:%u", model_id_); | GELOGI("ReturnNoOutput model id:%u", model_id_); | ||||
for (const auto item : broadcast_variable_) { | |||||
Status ret = VarManager::Instance(session_id_) | |||||
->SyncBroadCastData2Var(runtime_param_.graph_id, item.first, item.second, mem_base_); | |||||
GE_CHK_BOOL_EXEC(ret == SUCCESS, break, "sync var data ret failed, model id:%u, op name:%s.", model_id_, | |||||
item.first.c_str()); | |||||
} | |||||
GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | GE_CHK_BOOL_EXEC(listener_ != nullptr, return PARAM_INVALID, "listener_ is null!"); | ||||
std::vector<ge::OutputTensorInfo> outputs; | std::vector<ge::OutputTensorInfo> outputs; | ||||
@@ -3064,6 +3047,64 @@ Status DavinciModel::MallocKnownArgs() { | |||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
const domi::TaskDef &task_def, size_t task_index) { | |||||
bool flag = GetL1FusionEnableOption(); | |||||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
if (env_flag != 0) { | |||||
flag = true; | |||||
} | |||||
TaskDescInfo task_desc_info; | |||||
if (!om_name_.empty()) { | |||||
task_desc_info.model_name = om_name_; | |||||
} else { | |||||
task_desc_info.model_name = name_; | |||||
} | |||||
task_desc_info.op_name = op->GetName(); | |||||
task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
task_desc_info.task_id = task->GetTaskID(); | |||||
task_desc_info.stream_id = task->GetStreamId(); | |||||
task_desc_info.shape_type = "static"; | |||||
task_desc_info.cur_iter_num = 0; | |||||
// task type | |||||
task_desc_info.task_type = kTaskTypeInvalid; | |||||
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type()); | |||||
if (model_task_type == RT_MODEL_TASK_KERNEL) { | |||||
const domi::KernelDef &kernel_def = task_def.kernel(); | |||||
const auto &context = kernel_def.context(); | |||||
auto kernel_type = static_cast<ccKernelType>(context.kernel_type()); | |||||
if (kernel_type == ccKernelType::TE) { | |||||
task_desc_info.task_type = kTaskTypeAicore; | |||||
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { | |||||
task_desc_info.task_type = kTaskTypeAicpu; | |||||
} else { | |||||
GELOGD("Other kernel type: %u", context.kernel_type()); | |||||
} | |||||
} else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) { | |||||
task_desc_info.task_type = kTaskTypeAicpu; | |||||
} else { | |||||
GELOGD("Skip task type: %d", static_cast<int>(model_task_type)); | |||||
} | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
if (flag) { | |||||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
TaskDescInfo task_desc_info; | |||||
string op_name = "super_kernel_" + to_string(task_index); | |||||
task_desc_info.op_name = op_name; | |||||
task_desc_info.task_id = task->GetSktTaskID(); | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
} | |||||
} | |||||
return; | |||||
} | |||||
Status DavinciModel::DistributeTask() { | Status DavinciModel::DistributeTask() { | ||||
GELOGI("do Distribute."); | GELOGI("do Distribute."); | ||||
for (auto &task : cpu_task_list_) { | for (auto &task : cpu_task_list_) { | ||||
@@ -3075,18 +3116,11 @@ Status DavinciModel::DistributeTask() { | |||||
} | } | ||||
task_desc_info_.clear(); | task_desc_info_.clear(); | ||||
bool flag = GetL1FusionEnableOption(); | |||||
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 }; | |||||
INT32 res = mmGetEnv("SKT_ENABLE", skt_enable_env, MMPA_MAX_PATH); | |||||
int64_t env_flag = (res == EN_OK) ? std::strtol(skt_enable_env, nullptr, kDecimal) : 0; | |||||
if (env_flag != 0) { | |||||
flag = true; | |||||
} | |||||
const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | const auto &model_task_def = ge_model_->GetModelTaskDefPtr(); | ||||
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) { | ||||
auto &task_def = model_task_def->task(task_index); | auto &task_def = model_task_def->task(task_index); | ||||
auto &task = task_list_.at(task_index); | auto &task = task_list_.at(task_index); | ||||
GE_CHECK_NOTNULL(task); | |||||
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index); | ||||
// for data dump | // for data dump | ||||
auto op_index = std::max(task_def.kernel().context().op_index(), | auto op_index = std::max(task_def.kernel().context().op_index(), | ||||
@@ -3106,33 +3140,9 @@ Status DavinciModel::DistributeTask() { | |||||
GE_IF_BOOL_EXEC(no_need_profiling, continue); | GE_IF_BOOL_EXEC(no_need_profiling, continue); | ||||
SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId()); | ||||
// Load task info for profiling | |||||
TaskDescInfo task_desc_info; | |||||
if (!om_name_.empty()) { | |||||
task_desc_info.model_name = om_name_; | |||||
} else { | |||||
task_desc_info.model_name = name_; | |||||
} | |||||
task_desc_info.op_name = op->GetName(); | |||||
task_desc_info.block_dim = task_def.kernel().block_dim(); | |||||
task_desc_info.task_id = task->GetTaskID(); | |||||
task_desc_info.stream_id = task->GetStreamId(); | |||||
task_desc_info.shape_type = "static"; | |||||
task_desc_info.cur_iter_num = 0; | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
if (flag) { | |||||
if (task->GetSktTaskID() != 0xFFFFFFFF) { | |||||
TaskDescInfo task_desc_info; | |||||
string op_name = "super_kernel_" + to_string(task_index); | |||||
task_desc_info.op_name = op_name; | |||||
task_desc_info.task_id = task->GetSktTaskID(); | |||||
profiler_report_op_info_[task_desc_info.op_name] = | |||||
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id); | |||||
task_desc_info_.emplace_back(task_desc_info); | |||||
} | |||||
} | |||||
// save task info for profiling | |||||
SaveProfilingTaskDescInfo(op, task, task_def, task_index); | |||||
} | } | ||||
// launch dump kernel to aicpu | // launch dump kernel to aicpu | ||||
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed."); | ||||
@@ -3993,14 +4003,18 @@ Status DavinciModel::GetComputeGraphInfo(vector<ComputeGraphDescInfo> &graph_des | |||||
} else { | } else { | ||||
compute_graph_info.model_name = name_; | compute_graph_info.model_name = name_; | ||||
} | } | ||||
std::vector<Format> format = { FORMAT_NULL }; | |||||
std::vector<std::vector<int64_t>> shape = { {0} }; | |||||
std::vector<DataType> data_type = { DT_UNDEFINED }; | |||||
compute_graph_info.op_name = op_desc.op_name; | compute_graph_info.op_name = op_desc.op_name; | ||||
compute_graph_info.op_type = op_desc.op_type; | compute_graph_info.op_type = op_desc.op_type; | ||||
compute_graph_info.input_format = op_desc.input_format; | |||||
compute_graph_info.input_shape = op_desc.input_shape; | |||||
compute_graph_info.input_data_type = op_desc.input_data_type; | |||||
compute_graph_info.output_format = op_desc.output_format; | |||||
compute_graph_info.output_shape = op_desc.output_shape; | |||||
compute_graph_info.output_data_type = op_desc.output_data_type; | |||||
compute_graph_info.input_format = op_desc.input_format.empty() ? format : op_desc.input_format; | |||||
compute_graph_info.input_shape = op_desc.input_shape.empty() ? shape : op_desc.input_shape; | |||||
compute_graph_info.input_data_type = op_desc.input_data_type.empty() ? data_type : op_desc.input_data_type; | |||||
compute_graph_info.output_format = op_desc.output_format.empty() ? format : op_desc.output_format; | |||||
compute_graph_info.output_shape = op_desc.output_shape.empty() ? shape : op_desc.output_shape; | |||||
compute_graph_info.output_data_type = op_desc.output_data_type.empty() ? data_type : op_desc.output_data_type; | |||||
uint32_t task_id = 0; | uint32_t task_id = 0; | ||||
uint32_t stream_id = 0; | uint32_t stream_id = 0; | ||||
auto iter = profiler_report_op_info_.find(op_desc.op_name); | auto iter = profiler_report_op_info_.find(op_desc.op_name); |
@@ -32,12 +32,12 @@ | |||||
#include "common/types.h" | #include "common/types.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/aipp_utils.h" | |||||
#include "graph/load/new_model_manager/data_dumper.h" | |||||
#include "graph/load/new_model_manager/data_inputer.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||||
#include "graph/load/new_model_manager/zero_copy_task.h" | |||||
#include "graph/load/model_manager/aipp_utils.h" | |||||
#include "graph/load/model_manager/data_dumper.h" | |||||
#include "graph/load/model_manager/data_inputer.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/zero_copy_offset.h" | |||||
#include "graph/load/model_manager/zero_copy_task.h" | |||||
#include "graph/model.h" | #include "graph/model.h" | ||||
#include "graph/node.h" | #include "graph/node.h" | ||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
@@ -623,6 +623,9 @@ class DavinciModel { | |||||
Status DistributeTask(); | Status DistributeTask(); | ||||
void SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task, | |||||
const domi::TaskDef &task_def, size_t task_index); | |||||
uint8_t *MallocFeatureMapMem(size_t data_size); | uint8_t *MallocFeatureMapMem(size_t data_size); | ||||
uint8_t *MallocWeightsMem(size_t weights_size); | uint8_t *MallocWeightsMem(size_t weights_size); | ||||
@@ -824,7 +827,7 @@ class DavinciModel { | |||||
void OpDebugUnRegister(); | void OpDebugUnRegister(); | ||||
void CheckHasHcomOp(); | |||||
void CheckHasHcomOp(const ComputeGraphPtr &graph); | |||||
Status DoTaskSink(); | Status DoTaskSink(); | ||||
@@ -847,8 +850,8 @@ class DavinciModel { | |||||
Status InitOutputTensorInfo(const OpDescPtr &op_desc); | Status InitOutputTensorInfo(const OpDescPtr &op_desc); | ||||
Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | Status GenOutputTensorInfo(OutputData *output_data, vector<OutputTensorInfo> &outputs); | ||||
Status InitInputDescInfo(const map<uint32_t, OpDescPtr> &data_by_index); | |||||
Status InitOutputDescInfo(const vector<OpDescPtr> &output_op_list); | |||||
Status InitInputDescInfo(const OpDescPtr &op_desc); | |||||
Status InitOutputDescInfo(const OpDescPtr &op_desc, const vector<string> &out_node_name); | |||||
Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | Status InitOrigInputInfo(uint32_t index, const OpDescPtr &op_desc); | ||||
Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | Status InitAippInfo(uint32_t index, const OpDescPtr &op_desc); | ||||
@@ -883,7 +886,6 @@ class DavinciModel { | |||||
GeModelPtr ge_model_; // release after DavinciModel::Init | GeModelPtr ge_model_; // release after DavinciModel::Init | ||||
bool need_destroy_aicpu_kernel_{false}; | bool need_destroy_aicpu_kernel_{false}; | ||||
vector<string> out_node_name_; | |||||
map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | map<uint32_t, OpDescPtr> op_list_; // release after DavinciModel::Init | ||||
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
namespace ge { | namespace ge { | ||||
DavinciModelParser::DavinciModelParser() {} | DavinciModelParser::DavinciModelParser() {} |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include <string> | #include <string> | ||||
@@ -28,8 +28,8 @@ | |||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/common/ge_call_wrapper.h" | #include "graph/common/ge_call_wrapper.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/davinci_model_parser.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model_parser.h" | |||||
#include "model/ge_root_model.h" | #include "model/ge_root_model.h" | ||||
#include "graph/common/local_context.h" | #include "graph/common/local_context.h" | ||||
#include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
@@ -527,6 +527,7 @@ Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector<InputT | |||||
DataBuffer data; | DataBuffer data; | ||||
data.data = inputs[i].data; | data.data = inputs[i].data; | ||||
data.length = inputs[i].length; | data.length = inputs[i].length; | ||||
input_data.shapes.emplace_back(inputs[i].dims); | |||||
input_data.blobs.push_back(data); | input_data.blobs.push_back(data); | ||||
} | } | ||||
if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { | if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { | ||||
@@ -1703,7 +1704,7 @@ Status ModelManager::LaunchKernelCheckAicpuOp(std::vector<std::string> &aicpu_op | |||||
for (uint32_t i = 0; i < res_op_nums; i++) { | for (uint32_t i = 0; i < res_op_nums; i++) { | ||||
ReturnCode ret_code = res_ret_code_list.at(i); | ReturnCode ret_code = res_ret_code_list.at(i); | ||||
SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); | SysOpInfo aicpu_info = res_aicpu_op_info_list.at(i); | ||||
GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%d, ret_code:%d", aicpu_info.opType, | |||||
GELOGI("Not support aicpu op type: %lu, kernel_type:%d, opLen:%lu, ret_code:%d", aicpu_info.opType, | |||||
aicpu_info.kernelsType, aicpu_info.opLen, ret_code); | aicpu_info.kernelsType, aicpu_info.opLen, ret_code); | ||||
std::vector<char> op_name; | std::vector<char> op_name; | ||||
op_name.clear(); | op_name.clear(); |
@@ -14,20 +14,13 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include <string> | #include <string> | ||||
#include "common/debug/log.h" | #include "common/debug/log.h" | ||||
#include "common/op/ge_op_utils.h" | #include "common/op/ge_op_utils.h" | ||||
#include "graph/debug/ge_attr_define.h" | |||||
#include "graph/utils/attr_utils.h" | |||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "runtime/base.h" | |||||
#include "runtime/kernel.h" | |||||
#include "framework/common/debug/ge_log.h" | |||||
#include "graph/manager/graph_var_manager.h" | #include "graph/manager/graph_var_manager.h" | ||||
#include "graph/types.h" | |||||
#define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | #define VALIDATE_MEM_RANGE(OP, SIZE, OFFSET) \ | ||||
do { \ | do { \ | ||||
@@ -342,13 +335,13 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
int64_t input_offset = v_input_offset[non_const_index]; | int64_t input_offset = v_input_offset[non_const_index]; | ||||
non_const_index++; | non_const_index++; | ||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(input_offset), | ||||
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, input_offset - model_param.logic_var_base); | |||||
uint8_t *variable_addr = model_param.var_base + input_offset - model_param.logic_var_base; | |||||
uint8_t *variable_addr = nullptr; | |||||
GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, input_offset, variable_addr), return {}); | |||||
v_input_data_addr.push_back(variable_addr); | v_input_data_addr.push_back(variable_addr); | ||||
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | GELOGI("[IMAS]GetInputDataAddrs graph_%u type[V] name[%s] input[%lu] memaddr[%p]", | ||||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | ||||
continue); | continue); | ||||
int64_t mem_type; | int64_t mem_type; | ||||
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | ||||
// feature maps | // feature maps | ||||
@@ -382,6 +375,34 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co | |||||
/// | /// | ||||
/// @ingroup ge | /// @ingroup ge | ||||
/// @brief Get variable address. | |||||
/// @return Status | |||||
/// | |||||
Status ModelUtils::GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||||
uint8_t *&var_addr) { | |||||
rtMemType_t mem_type = ge::VarManager::Instance(model_param.session_id)->GetVarMemType(offset); | |||||
switch (mem_type) { | |||||
case RT_MEMORY_RDMA_HBM: | |||||
if (offset < 0) { | |||||
GELOGE(PARAM_INVALID, "rdma var addr is invalid, addr=%p", reinterpret_cast<uint8_t *>(offset)); | |||||
return PARAM_INVALID; | |||||
} | |||||
var_addr = reinterpret_cast<uint8_t *>(offset); | |||||
break; | |||||
case RT_MEMORY_HBM: | |||||
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, offset - model_param.logic_var_base); | |||||
var_addr = model_param.var_base + offset - model_param.logic_var_base; | |||||
break; | |||||
default: | |||||
GELOGE(PARAM_INVALID, "unsupported memory type %u", mem_type); | |||||
return PARAM_INVALID; | |||||
} | |||||
GE_CHECK_NOTNULL(var_addr); | |||||
return SUCCESS; | |||||
} | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get output data address. | /// @brief Get output data address. | ||||
/// @return vector<void*> | /// @return vector<void*> | ||||
/// | /// | ||||
@@ -404,19 +425,26 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C | |||||
return v_output_data_addr; | return v_output_data_addr; | ||||
} | } | ||||
for (size_t i = 0; i < outputs_size; ++i) { | for (size_t i = 0; i < outputs_size; ++i) { | ||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||||
VALIDATE_MEM_RANGE(op_desc, model_param.var_size, v_output_offset[i] - model_param.logic_var_base); | |||||
uint8_t *variable_addr = model_param.var_base + v_output_offset[i] - model_param.logic_var_base; | |||||
v_output_data_addr.push_back(variable_addr); | |||||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||||
continue); | |||||
const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | const GeTensorDescPtr tensor_desc = op_desc->MutableOutputDesc(i); | ||||
if (tensor_desc == nullptr) { | if (tensor_desc == nullptr) { | ||||
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); | ||||
continue; | continue; | ||||
} | } | ||||
int32_t calc_type = 0; | |||||
bool ret = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_MEMORY_SIZE_CALC_TYPE, calc_type); | |||||
if (ret && (calc_type == static_cast<int32_t>(ge::MemorySizeCalcType::ALWAYS_EMPTY))) { | |||||
GELOGD("%s is an optional output, the address don't need to be saved.", tensor_desc->GetName().c_str()); | |||||
continue; | |||||
} | |||||
GE_IF_BOOL_EXEC(model_param.var_size != 0 && ge::VarManager::Instance(session_id)->IsVarAddr(v_output_offset[i]), | |||||
uint8_t *variable_addr = nullptr; | |||||
GE_CHK_STATUS_EXEC(GetVarAddr(model_param, op_desc, v_output_offset[i], variable_addr), return {}); | |||||
v_output_data_addr.push_back(variable_addr); | |||||
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[V] name[%s] output[%zu] memaddr[%p]", | |||||
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); | |||||
continue); | |||||
int64_t mem_type; | int64_t mem_type; | ||||
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); | ||||
// feature maps | // feature maps |
@@ -21,7 +21,7 @@ | |||||
#include "common/ge_inner_error_codes.h" | #include "common/ge_inner_error_codes.h" | ||||
#include "common/types.h" | #include "common/types.h" | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
#include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
@@ -107,6 +107,15 @@ class ModelUtils { | |||||
/// @return Status | /// @return Status | ||||
/// | /// | ||||
static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); | static Status GetRtAddress(const RuntimeParam &model_param, uintptr_t logic_addr, uint8_t *&mem_addr); | ||||
private: | |||||
/// | |||||
/// @ingroup ge | |||||
/// @brief Get variable address. | |||||
/// @return Status | |||||
/// | |||||
static Status GetVarAddr(const RuntimeParam &model_param, const ConstOpDescPtr &op_desc, int64_t offset, | |||||
uint8_t *&var_addr); | |||||
}; | }; | ||||
} // namespace ge | } // namespace ge | ||||
@@ -14,11 +14,11 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/end_graph_task_info.h" | |||||
#include "graph/load/model_manager/task_info/end_graph_task_info.h" | |||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace { | namespace { | ||||
const uint32_t kDumpFlag = 2; | const uint32_t kDumpFlag = 2; |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_END_GRAPH_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class EndGraphTaskInfo : public TaskInfo { | class EndGraphTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/event_record_task_info.h" | |||||
#include "graph/load/model_manager/task_info/event_record_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status EventRecordTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_RECORD_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class EventRecordTaskInfo : public TaskInfo { | class EventRecordTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/event_wait_task_info.h" | |||||
#include "graph/load/model_manager/task_info/event_wait_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status EventWaitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_EVENT_WAIT_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class EventWaitTaskInfo : public TaskInfo { | class EventWaitTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/fusion_start_task_info.h" | |||||
#include "graph/load/model_manager/task_info/fusion_start_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status FusionStartTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_START_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class FusionStartTaskInfo : public TaskInfo { | class FusionStartTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/fusion_stop_task_info.h" | |||||
#include "graph/load/model_manager/task_info/fusion_stop_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status FusionStopTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_FUSION_STOP_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class FusionStopTaskInfo : public TaskInfo { | class FusionStopTaskInfo : public TaskInfo { |
@@ -14,14 +14,14 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/hccl_task_info.h" | |||||
#include "graph/load/model_manager/task_info/hccl_task_info.h" | |||||
#include <utility> | #include <utility> | ||||
#include "common/opskernel/ops_kernel_info_store.h" | #include "common/opskernel/ops_kernel_info_store.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
namespace ge { | namespace ge { | ||||
std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; | std::mutex HcclTaskInfo::hccl_follow_stream_mutex_; |
@@ -23,7 +23,7 @@ | |||||
#include <vector> | #include <vector> | ||||
#include "common/opskernel/ge_task_info.h" | #include "common/opskernel/ge_task_info.h" | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/manager/util/hcom_util.h" | #include "graph/manager/util/hcom_util.h" | ||||
namespace ge { | namespace ge { | ||||
class HcclTaskInfo : public TaskInfo { | class HcclTaskInfo : public TaskInfo { |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/kernel_ex_task_info.h" | |||||
#include "graph/load/model_manager/task_info/kernel_ex_task_info.h" | |||||
#include <vector> | #include <vector> | ||||
@@ -24,8 +24,8 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/fmk_error_codes.h" | #include "framework/common/fmk_error_codes.h" | ||||
#include "graph/attr_value.h" | #include "graph/attr_value.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
namespace ge { | namespace ge { | ||||
Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_KERNEL_EX_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
namespace ge { | namespace ge { |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/kernel_task_info.h" | |||||
#include "graph/load/model_manager/task_info/kernel_task_info.h" | |||||
#include <map> | #include <map> | ||||
#include <memory> | #include <memory> | ||||
#include <string> | #include <string> | ||||
@@ -25,9 +25,9 @@ | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/l2_cache_optimize.h" | #include "framework/common/l2_cache_optimize.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/model_manager.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/model_manager.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "runtime/kernel.h" | #include "runtime/kernel.h" | ||||
#include "super_kernel/super_kernel.h" | #include "super_kernel/super_kernel.h" | ||||
#include "super_kernel/super_kernel_factory.h" | #include "super_kernel/super_kernel_factory.h" |
@@ -22,7 +22,7 @@ | |||||
#include <string> | #include <string> | ||||
#include <vector> | #include <vector> | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
namespace ge { | namespace ge { | ||||
class KernelTaskInfo : public TaskInfo { | class KernelTaskInfo : public TaskInfo { |
@@ -14,9 +14,9 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/label_goto_ex_task_info.h" | |||||
#include "graph/load/model_manager/task_info/label_goto_ex_task_info.h" | |||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
namespace ge { | namespace ge { |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class LabelGotoExTaskInfo : public TaskInfo { | class LabelGotoExTaskInfo : public TaskInfo { |
@@ -14,9 +14,9 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/label_set_task_info.h" | |||||
#include "graph/load/model_manager/task_info/label_set_task_info.h" | |||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
namespace ge { | namespace ge { |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class LabelSetTaskInfo : public TaskInfo { | class LabelSetTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/label_switch_by_index_task_info.h" | |||||
#include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
constexpr uint8_t kLabelSwitchIndexNum = 1; | constexpr uint8_t kLabelSwitchIndexNum = 1; |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class LabelSwitchByIndexTaskInfo : public TaskInfo { | class LabelSwitchByIndexTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/memcpy_addr_async_task_info.h" | |||||
#include "graph/load/model_manager/task_info/memcpy_addr_async_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace { | namespace { | ||||
const uint32_t kAlignBytes = 64; | const uint32_t kAlignBytes = 64; |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ADDR_ASYNC_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class MemcpyAddrAsyncTaskInfo : public TaskInfo { | class MemcpyAddrAsyncTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/memcpy_async_task_info.h" | |||||
#include "graph/load/model_manager/task_info/memcpy_async_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MEMCPY_ASYNC_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
namespace ge { | namespace ge { |
@@ -14,11 +14,11 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/model_exit_task_info.h" | |||||
#include "graph/load/model_manager/task_info/model_exit_task_info.h" | |||||
#include "common/properties_manager.h" | #include "common/properties_manager.h" | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status ModelExitTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_MODEL_EXIT_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class ModelExitTaskInfo : public TaskInfo { | class ModelExitTaskInfo : public TaskInfo { |
@@ -14,10 +14,10 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/profiler_trace_task_info.h" | |||||
#include "graph/load/model_manager/task_info/profiler_trace_task_info.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
namespace ge { | namespace ge { | ||||
Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { | Status ProfilerTraceTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_PROFILER_TRACE_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class ProfilerTraceTaskInfo : public TaskInfo { | class ProfilerTraceTaskInfo : public TaskInfo { |
@@ -14,12 +14,12 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/stream_active_task_info.h" | |||||
#include "graph/load/model_manager/task_info/stream_active_task_info.h" | |||||
#include <vector> | #include <vector> | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
namespace ge { | namespace ge { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_ACTIVE_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class StreamActiveTaskInfo : public TaskInfo { | class StreamActiveTaskInfo : public TaskInfo { |
@@ -14,13 +14,13 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/stream_switch_task_info.h" | |||||
#include "graph/load/model_manager/task_info/stream_switch_task_info.h" | |||||
#include <vector> | #include <vector> | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
namespace ge { | namespace ge { |
@@ -16,7 +16,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
namespace ge { | namespace ge { | ||||
class StreamSwitchTaskInfo : public TaskInfo { | class StreamSwitchTaskInfo : public TaskInfo { |
@@ -13,12 +13,12 @@ | |||||
* See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/stream_switchn_task_info.h" | |||||
#include "graph/load/model_manager/task_info/stream_switchn_task_info.h" | |||||
#include <vector> | #include <vector> | ||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/davinci_model.h" | |||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/davinci_model.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
namespace { | namespace { | ||||
const uint8_t kStreamSwitchnInputNum = 1; | const uint8_t kStreamSwitchnInputNum = 1; |
@@ -17,7 +17,7 @@ | |||||
#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | ||||
#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCHN_TASK_INFO_H_ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include "graph/op_desc.h" | #include "graph/op_desc.h" | ||||
namespace ge { | namespace ge { |
@@ -14,7 +14,7 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/task_info/task_info.h" | |||||
#include "graph/load/model_manager/task_info/task_info.h" | |||||
#include <vector> | #include <vector> | ||||
@@ -22,8 +22,8 @@ | |||||
#include "cce/customize.h" | #include "cce/customize.h" | ||||
#include "framework/common/taskdown_common.h" | #include "framework/common/taskdown_common.h" | ||||
#include "framework/common/ge_inner_error_codes.h" | #include "framework/common/ge_inner_error_codes.h" | ||||
#include "graph/load/new_model_manager/ts_mem_mall.h" | |||||
#include "graph/load/new_model_manager/task_info/task_info_factory.h" | |||||
#include "graph/load/model_manager/ts_mem_mall.h" | |||||
#include "graph/load/model_manager/task_info/task_info_factory.h" | |||||
#include "proto/task.pb.h" | #include "proto/task.pb.h" | ||||
namespace ge { | namespace ge { |
@@ -14,12 +14,12 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/zero_copy_offset.h" | |||||
#include "graph/load/model_manager/zero_copy_offset.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/new_model_manager/zero_copy_task.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/zero_copy_task.h" | |||||
namespace ge { | namespace ge { | ||||
namespace { | namespace { |
@@ -25,7 +25,7 @@ | |||||
#include "external/ge/ge_api_error_codes.h" | #include "external/ge/ge_api_error_codes.h" | ||||
#include "framework/common/ge_types.h" | #include "framework/common/ge_types.h" | ||||
#include "graph/debug/ge_attr_define.h" | #include "graph/debug/ge_attr_define.h" | ||||
#include "graph/load/new_model_manager/zero_copy_task.h" | |||||
#include "graph/load/model_manager/zero_copy_task.h" | |||||
#include "graph/utils/attr_utils.h" | #include "graph/utils/attr_utils.h" | ||||
#include "graph/utils/tensor_utils.h" | #include "graph/utils/tensor_utils.h" | ||||
#include "runtime/mem.h" | #include "runtime/mem.h" |
@@ -14,11 +14,11 @@ | |||||
* limitations under the License. | * limitations under the License. | ||||
*/ | */ | ||||
#include "graph/load/new_model_manager/zero_copy_task.h" | |||||
#include "graph/load/model_manager/zero_copy_task.h" | |||||
#include "framework/common/debug/ge_log.h" | #include "framework/common/debug/ge_log.h" | ||||
#include "framework/common/util.h" | #include "framework/common/util.h" | ||||
#include "graph/load/new_model_manager/model_utils.h" | |||||
#include "graph/load/model_manager/model_utils.h" | |||||
#include "common/ge_compiler_options.h" | #include "common/ge_compiler_options.h" | ||||
namespace ge { | namespace ge { |
@@ -92,6 +92,7 @@ | |||||
#include "graph/passes/unused_args_clean_pass.h" | #include "graph/passes/unused_args_clean_pass.h" | ||||
#include "graph/passes/global_step_insert_pass.h" | #include "graph/passes/global_step_insert_pass.h" | ||||
#include "graph/passes/memcpy_addr_async_pass.h" | #include "graph/passes/memcpy_addr_async_pass.h" | ||||
#include "graph/passes/hccl_memcpy_pass.h" | |||||
#include "graph/build/label_allocator.h" | #include "graph/build/label_allocator.h" | ||||
#include "graph/utils/tensor_adapter.h" | #include "graph/utils/tensor_adapter.h" | ||||
#include "inc/pass_manager.h" | #include "inc/pass_manager.h" | ||||
@@ -729,9 +730,7 @@ Status GraphManager::PreRunAfterOptimizeSubGraph(const GraphNodePtr &graph_node, | |||||
CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | CompilerStages &stages = GetCompilerStages(graph_node->GetGraphId()); | ||||
GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | GM_RUN_AND_DUMP_PERF("OptimizeWholeGraph", stages.optimizer.OptimizeWholeGraph, compute_graph); | ||||
GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | GM_RUN_AND_DUMP_PERF("Optimize2", OptimizeStage2, compute_graph); | ||||
GM_RUN_AND_DUMP_PERF("OptimizeGraphBeforeBuildForRts", | |||||
GetCompilerStages(graph_node->GetGraphId()).optimizer.OptimizeGraphBeforeBuildForRts, | |||||
compute_graph); | |||||
GM_RUN_AND_DUMP_PERF("OptimizeBeforeBuildForRts", stages.optimizer.OptimizeGraphBeforeBuildForRts, compute_graph); | |||||
Status ret = compute_graph->TopologicalSorting(); | Status ret = compute_graph->TopologicalSorting(); | ||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
@@ -2150,6 +2149,8 @@ Status GraphManager::OptimizeStage1(ge::ComputeGraphPtr &compute_graph) { | |||||
new (std::nothrow) TransOpWithoutReshapeFusionPass)) | new (std::nothrow) TransOpWithoutReshapeFusionPass)) | ||||
GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | GE_CHK_STATUS_RET(after_merge_passes.AddPass("OptimizeStage1_1::TransOpBreadthFusionPass", | ||||
new (std::nothrow) TransOpBreadthFusionPass)) | new (std::nothrow) TransOpBreadthFusionPass)) | ||||
GE_CHK_STATUS_RET( | |||||
after_merge_passes.AddPass("OptimizeStage1_1::HcclMemcpyPass", new (std::nothrow) HcclMemcpyPass)); | |||||
GE_TIMESTAMP_START(after_merge_passes); | GE_TIMESTAMP_START(after_merge_passes); | ||||
auto ret = after_merge_passes.Run(compute_graph); | auto ret = after_merge_passes.Run(compute_graph); | ||||
@@ -2776,7 +2777,7 @@ Status GraphManager::ParseInputsDimsForGetNexNosinkAndData(const vector<NodePtr> | |||||
} | } | ||||
GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims); | GetLocalOmgContext().user_real_input_dims.emplace_back(input_tensor.at(index).dims); | ||||
GELOGI("Shape dims of %d data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); | |||||
GELOGI("Shape dims of %zu data is %s.", index, formats::JoinToString(input_tensor.at(index).dims).c_str()); | |||||
} | } | ||||
return SUCCESS; | return SUCCESS; | ||||
} | } | ||||
@@ -3121,9 +3122,8 @@ Status GraphManager::Build(const GraphNodePtr &graph_node, ComputeGraphPtr &comp | |||||
graph_name.append(std::to_string(graph_node->GetGraphId())); | graph_name.append(std::to_string(graph_node->GetGraphId())); | ||||
compute_graph->SetName(graph_name); | compute_graph->SetName(graph_name); | ||||
} | } | ||||
std::vector<SubGraphInfoPtr> sub_graph_list; | |||||
auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, sub_graph_list, ge_root_model, | |||||
session_id); | |||||
auto ret = GetCompilerStages(graph_node->GetGraphId()).builder.Build(compute_graph, ge_root_model, session_id); | |||||
if (ret != SUCCESS) { | if (ret != SUCCESS) { | ||||
GELOGE(ret, "SubGraph build Failed."); | GELOGE(ret, "SubGraph build Failed."); | ||||
return ret; | return ret; | ||||